-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE #98968
Changes from 1 commit
d1b3855
5666ebd
84c3295
a252b0d
7f8f4f3
f66ce52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -148,3 +148,129 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) { | |
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret <2 x half> %val | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_i8_dead | ||
define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.u8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -NOT checks are fragile. Can you precommit generated checks? Also, why would deleting these dead intrinsics need to be done in codegen and not already done in the middle end? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously these weren't getting deleted before codegen since they weren't |
||
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_i16_dead | ||
define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.u16 | ||
%val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_i32_dead | ||
define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.u32 | ||
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_i64_dead | ||
define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.u64 | ||
%val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_p_dead | ||
define void @test_ldu_p_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.u64 | ||
%val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_f32_dead | ||
define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.f32 | ||
%val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_f64_dead | ||
define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.f64 | ||
%val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_f16_dead | ||
define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.u16 | ||
%val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldu_v2f16_dead | ||
define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ldu.global.u32 | ||
%val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_i8_dead | ||
define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.u8 | ||
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_i16_dead | ||
define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.u16 | ||
%val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_i32_dead | ||
define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.u32 | ||
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_i64_dead | ||
define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.u64 | ||
%val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_p_dead | ||
define void @test_ldg_p_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.u64 | ||
%val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_f32_dead | ||
define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.f32 | ||
%val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_f64_dead | ||
define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.f64 | ||
%val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_f16_dead | ||
define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.u16 | ||
%val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: test_ldg_v2f16_dead | ||
define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) { | ||
; CHECK-NOT: ld.global.nc.u32 | ||
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You shouldn't need to touch this. This is also a behavior change for callsite attributes, which will cause legalization failures for readnone callsites
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the comment Matt. This particular change is the main motivation for this patch, so I'd like to better understand potential issues with it. Could you elaborate on the behavior change and the failures it would cause? I haven't noticed any failures of that nature reflected in tests, but maybe there isn't coverage for it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The IR does not have a notion of non-memory side effects. The code previously only considers the attributes of the underlying function definition, which is what all other intrinsic code expects (and changes the opcode between INTRINSIC_WO_CHAIN/INTRINSIC_W_CHAIN/INTRINSIC_VOID). This now will swap the opcode and operand structure depending on whether the callsite has a memory(none) attribute.
So TLDR this change is just wrong, you can only consider the underlying function declaration and not the instruction / call site instance
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for explaining!
I understand that it will affect the operands, but I don't see where
OnlyLoad
affects the opcode. That seems like something determined byHasChain
.If the issue is with using the instruction, would it then be reasonable to use
F->willReturn()
instead of!I.mayHaveSideEffects()
? It still appears to me incorrect to treat an intrinsic as only a load when it may not return, as the previous code is doing.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand why willreturn matters in this context. But also, we don't need to repeat every optimization in codegen.
In any case, a patch touching the intrinsic attributes shouldn't be in the same patch as one changing the behavior here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The point of the
willReturn
check is to avoid deleting a!willReturn
intrinsic. In their non-updated form, the nvvm intrinsics demonstrate the problem of not checking forwillReturn
- the calls are not deleted at the LLVM IR level but are deleted when building the SDAG. This seems incorrect. These calls were unsafe to remove from the IR because the functions are!willReturn
and should be unsafe to remove from the SDAG for the same reason.Since these nvvm intrinsics are just loads they should actually be removable, so we update them with
IntrWillReturn
. I don't have an example of a public intrinsic that is intended to behave the same way these intrinsics did before updating them, but it should be possible to make one and keep it from being removed by being!willReturn
.That being said, I will remove the SDAG change from this patch and make another PR for that.