-
Notifications
You must be signed in to change notification settings - Fork 12.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CUDA] Add support for __grid_constant__ attribute (#114589)
LLVM support for the attribute has been implemented already, so it just plumbs it through to the CUDA front-end. One notable difference from NVCC is that the attribute can be used regardless of the targeted GPU. On the older GPUs it will just be ignored. The attribute is a performance hint, and does not warrant a hard error if compiler can't benefit from it on a particular GPU variant.
- Loading branch information
Showing
13 changed files
with
145 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 | ||
// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s | ||
|
||
#include "Inputs/cuda.h" | ||
|
||
struct S {}; | ||
|
||
__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {} | ||
|
||
// dependent arguments get diagnosed after instantiation. | ||
template <typename T> | ||
__global__ void tkernel_const(__grid_constant__ const T arg) {} | ||
|
||
template <typename T> | ||
__global__ void tkernel(int dummy, __grid_constant__ T arg) {} | ||
|
||
void foo() { | ||
tkernel_const<const S><<<1,1>>>({}); | ||
tkernel_const<S><<<1,1>>>({}); | ||
tkernel<const S><<<1,1>>>(1, {}); | ||
} | ||
//. | ||
//. | ||
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]} | ||
// CHECK: [[META1]] = !{i32 1, i32 3} | ||
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]} | ||
// CHECK: [[META3]] = !{i32 1} | ||
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]} | ||
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]} | ||
// CHECK: [[META6]] = !{i32 2} | ||
//. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// RUN: %clang_cc1 -fsyntax-only -verify %s | ||
// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s | ||
#include "Inputs/cuda.h" | ||
|
||
struct S {}; | ||
|
||
__global__ void kernel_struct(__grid_constant__ const S arg) {} | ||
__global__ void kernel_scalar(__grid_constant__ const int arg) {} | ||
|
||
__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} | ||
|
||
void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} | ||
|
||
// templates w/ non-dependent argument types get diagnosed right | ||
// away, without instantiation. | ||
template <typename T> | ||
__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {} | ||
template <typename T> | ||
__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} | ||
|
||
// dependent arguments get diagnosed after instantiation. | ||
template <typename T> | ||
__global__ void tkernel_const(__grid_constant__ const T arg) {} | ||
|
||
template <typename T> | ||
__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} | ||
|
||
void foo() { | ||
tkernel_const<const S><<<1,1>>>({}); | ||
tkernel_const<S><<<1,1>>>({}); | ||
tkernel<const S><<<1,1>>>({}); | ||
tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}} | ||
} |