Skip to content

Commit

Permalink
merge main into amd-staging
Browse files Browse the repository at this point in the history
Change-Id: I920e3c6518a78e762ce6b770d7e8c701e41593dc
  • Loading branch information
Jenkins committed Oct 5, 2024
2 parents 9548705 + e6549b8 commit eb1a4a6
Show file tree
Hide file tree
Showing 36 changed files with 741 additions and 690 deletions.
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4824,6 +4824,12 @@ def HLSLStep: LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}

def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_radians"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

// Builtins for XRay.
def XRayCustomEvent : Builtin {
let Spellings = ["__xray_customevent"];
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18896,6 +18896,15 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
retType, CGM.getHLSLRuntime().getSignIntrinsic(),
ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
}
case Builtin::BI__builtin_hlsl_elementwise_radians: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
"radians operand must have a float representation");
return Builder.CreateIntrinsic(
/*ReturnType=*/Op0->getType(),
CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
nullptr, "hlsl.radians");
}
}
return nullptr;
}
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign)
GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians)
GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Format/TokenAnnotator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3738,6 +3738,13 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,

const auto *Prev = Current.getPreviousNonComment();
assert(Prev);

if (Prev->is(tok::coloncolon))
Prev = Prev->Previous;

if (!Prev)
return false;

const auto &Previous = *Prev;

if (const auto *PrevPrev = Previous.getPreviousNonComment();
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Format/UnwrappedLineParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2131,6 +2131,11 @@ void UnwrappedLineParser::parseStructuralElement(
return;
}
break;
case tok::greater:
nextToken();
if (FormatTok->is(tok::l_brace))
FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
break;
default:
nextToken();
break;
Expand Down
30 changes: 30 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -2138,5 +2138,35 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
int3 sign(double3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
int4 sign(double4);

//===----------------------------------------------------------------------===//
// radians builtins
//===----------------------------------------------------------------------===//

/// \fn T radians(T Val)
/// \brief Converts the specified value from degrees to radians.

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
half radians(half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
half2 radians(half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
half3 radians(half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
half4 radians(half4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
float radians(float);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
float2 radians(float2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
float3 radians(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
float4 radians(float4);

} // namespace hlsl
#endif //_HLSL_HLSL_INTRINSICS_H_
1 change: 1 addition & 0 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
case Builtin::BI__builtin_hlsl_elementwise_radians:
case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
case Builtin::BI__builtin_hlsl_elementwise_frac: {
if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
Expand Down
66 changes: 66 additions & 0 deletions clang/test/CodeGenHLSL/builtins/radians.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF \
// RUN: -DTARGET=dx -DFNATTRS=noundef
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
// RUN: -DTARGET=dx -DFNATTRS=noundef
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF \
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"


// NATIVE_HALF: define [[FNATTRS]] half @
// NATIVE_HALF: %{{.*}} = call half @llvm.[[TARGET]].radians.f16(
// NATIVE_HALF: ret half %{{.*}}
// NO_HALF: define [[FNATTRS]] float @
// NO_HALF: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
// NO_HALF: ret float %{{.*}}
half test_radians_half(half p0) { return radians(p0); }
// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
// NATIVE_HALF: %{{.*}} = call <2 x half> @llvm.[[TARGET]].radians.v2f16
// NATIVE_HALF: ret <2 x half> %{{.*}}
// NO_HALF: define [[FNATTRS]] <2 x float> @
// NO_HALF: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32(
// NO_HALF: ret <2 x float> %{{.*}}
half2 test_radians_half2(half2 p0) { return radians(p0); }
// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
// NATIVE_HALF: %{{.*}} = call <3 x half> @llvm.[[TARGET]].radians.v3f16
// NATIVE_HALF: ret <3 x half> %{{.*}}
// NO_HALF: define [[FNATTRS]] <3 x float> @
// NO_HALF: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32(
// NO_HALF: ret <3 x float> %{{.*}}
half3 test_radians_half3(half3 p0) { return radians(p0); }
// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
// NATIVE_HALF: %{{.*}} = call <4 x half> @llvm.[[TARGET]].radians.v4f16
// NATIVE_HALF: ret <4 x half> %{{.*}}
// NO_HALF: define [[FNATTRS]] <4 x float> @
// NO_HALF: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32(
// NO_HALF: ret <4 x float> %{{.*}}
half4 test_radians_half4(half4 p0) { return radians(p0); }

// CHECK: define [[FNATTRS]] float @
// CHECK: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
// CHECK: ret float %{{.*}}
float test_radians_float(float p0) { return radians(p0); }
// CHECK: define [[FNATTRS]] <2 x float> @
// CHECK: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32
// CHECK: ret <2 x float> %{{.*}}
float2 test_radians_float2(float2 p0) { return radians(p0); }
// CHECK: define [[FNATTRS]] <3 x float> @
// CHECK: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32
// CHECK: ret <3 x float> %{{.*}}
float3 test_radians_float3(float3 p0) { return radians(p0); }
// CHECK: define [[FNATTRS]] <4 x float> @
// CHECK: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32
// CHECK: ret <4 x float> %{{.*}}
float4 test_radians_float4(float4 p0) { return radians(p0); }

1 change: 1 addition & 0 deletions clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tan
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tanh
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_trunc
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_hlsl_elementwise_radians

double test_double_builtin(double p0) {
return TEST_FUNC(p0);
Expand Down
27 changes: 27 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/radians-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected

float test_too_few_arg() {
return __builtin_hlsl_elementwise_radians();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
}

float2 test_too_many_arg(float2 p0) {
return __builtin_hlsl_elementwise_radians(p0, p0);
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
}

float builtin_bool_to_float_type_promotion(bool p1) {
return __builtin_hlsl_elementwise_radians(p1);
// expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
}

float builtin_radians_int_to_float_promotion(int p1) {
return __builtin_hlsl_elementwise_radians(p1);
// expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
}

float2 builtin_radians_int2_to_float2_promotion(int2 p1) {
return __builtin_hlsl_elementwise_radians(p1);
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
}

13 changes: 13 additions & 0 deletions clang/unittests/Format/TokenAnnotatorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,14 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) {
EXPECT_TOKEN(Tokens[6], tok::r_paren, TT_OverloadedOperator);
EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_OverloadedOperatorLParen);
EXPECT_TOKEN(Tokens[9], tok::amp, TT_PointerOrReference);

Tokens = annotate("friend ostream& ::operator<<(ostream& lhs, foo& rhs);");
ASSERT_EQ(Tokens.size(), 17u) << Tokens;
EXPECT_TOKEN(Tokens[4], tok::kw_operator, TT_FunctionDeclarationName);
EXPECT_TOKEN(Tokens[5], tok::lessless, TT_OverloadedOperator);
EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_OverloadedOperatorLParen);
EXPECT_TOKEN(Tokens[8], tok::amp, TT_PointerOrReference);
EXPECT_TOKEN(Tokens[12], tok::amp, TT_PointerOrReference);
}

TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) {
Expand Down Expand Up @@ -3546,6 +3554,11 @@ TEST_F(TokenAnnotatorTest, TemplateInstantiation) {
ASSERT_EQ(Tokens.size(), 11u) << Tokens;
EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);

Tokens = annotate("return std::conditional_t<T::value == U::value, T, U>{};");
ASSERT_EQ(Tokens.size(), 21u) << Tokens;
EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
EXPECT_TOKEN(Tokens[16], tok::greater, TT_TemplateCloser);
}

} // namespace
Expand Down
13 changes: 0 additions & 13 deletions flang/include/flang/Runtime/CUDA/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#ifndef FORTRAN_RUNTIME_CUDA_COMMON_H_
#define FORTRAN_RUNTIME_CUDA_COMMON_H_

#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Runtime/descriptor.h"
#include "flang/Runtime/entry-names.h"

Expand All @@ -35,16 +34,4 @@ static constexpr unsigned kDeviceToDevice = 2;
terminator.Crash("'%s' failed with '%s'", #expr, name); \
}(expr)

static inline unsigned getMemType(cuf::DataAttribute attr) {
if (attr == cuf::DataAttribute::Device)
return kMemTypeDevice;
if (attr == cuf::DataAttribute::Managed)
return kMemTypeManaged;
if (attr == cuf::DataAttribute::Unified)
return kMemTypeUnified;
if (attr == cuf::DataAttribute::Pinned)
return kMemTypePinned;
llvm::report_fatal_error("unsupported memory type");
}

#endif // FORTRAN_RUNTIME_CUDA_COMMON_H_
12 changes: 12 additions & 0 deletions flang/lib/Optimizer/Transforms/CufOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ using namespace Fortran::runtime::cuda;

namespace {

static inline unsigned getMemType(cuf::DataAttribute attr) {
if (attr == cuf::DataAttribute::Device)
return kMemTypeDevice;
if (attr == cuf::DataAttribute::Managed)
return kMemTypeManaged;
if (attr == cuf::DataAttribute::Unified)
return kMemTypeUnified;
if (attr == cuf::DataAttribute::Pinned)
return kMemTypePinned;
llvm::report_fatal_error("unsupported memory type");
}

template <typename OpTy>
static bool isPinned(OpTy op) {
if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)
Expand Down
24 changes: 11 additions & 13 deletions libc/docs/gpu/using.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ described in the `clang documentation
by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.

In order or link the GPU runtime, we simply pass this library to the embedded
device linker job. This can be done using the ``-Xoffload-linker`` option, which
forwards an argument to a ``clang`` job used to create the final GPU executable.
The toolchain should pick up the C libraries automatically in most cases, so
In order or link the GPU runtime, we simply pass this library to the embedded
device linker job. This can be done using the ``-Xoffload-linker`` option, which
forwards an argument to a ``clang`` job used to create the final GPU executable.
The toolchain should pick up the C libraries automatically in most cases, so
this shouldn't be necessary.

.. code-block:: sh
Expand Down Expand Up @@ -189,7 +189,7 @@ final executable.

#include <stdio.h>

int main() { fputs("Hello from AMDGPU!\n", stdout); }
int main() { printf("Hello from AMDGPU!\n"); }

This program can then be compiled using the ``clang`` compiler. Note that
``-flto`` and ``-mcpu=`` should be defined. This is because the GPU
Expand Down Expand Up @@ -227,28 +227,26 @@ Building for NVPTX targets
^^^^^^^^^^^^^^^^^^^^^^^^^^

The infrastructure is the same as the AMDGPU example. However, the NVPTX binary
utilities are very limited and must be targeted directly. There is no linker
support for static libraries so we need to link in the ``libc.bc`` bitcode and
inform the compiler driver of the file's contents.
utilities are very limited and must be targeted directly. A utility called
``clang-nvlink-wrapper`` instead wraps around the standard link job to give the
illusion that ``nvlink`` is a functional linker.

.. code-block:: c++

#include <stdio.h>

int main(int argc, char **argv, char **envp) {
fputs("Hello from NVPTX!\n", stdout);
printf("Hello from NVPTX!\n");
}

Additionally, the NVPTX ABI requires that every function signature matches. This
requires us to pass the full prototype from ``main``. The installation will
contain the ``nvptx-loader`` utility if the CUDA driver was found during
compilation.
compilation. Using link time optimization will help hide this.

.. code-block:: sh

$> clang hello.c --target=nvptx64-nvidia-cuda -march=native \
-x ir <install>/lib/nvptx64-nvidia-cuda/libc.bc \
-x ir <install>/lib/nvptx64-nvidia-cuda/crt1.o
$> clang hello.c --target=nvptx64-nvidia-cuda -mcpu=native -flto -lc <install>/lib/nvptx64-nvidia-cuda/crt1.o
$> nvptx-loader --threads 2 --blocks 2 a.out
Hello from NVPTX!
Hello from NVPTX!
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,5 @@ def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]
def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
}
Loading

0 comments on commit eb1a4a6

Please sign in to comment.