merge main into amd-staging

Change-Id: I920e3c6518a78e762ce6b770d7e8c701e41593dc
ROCm · Oct 5, 2024 · eb1a4a6 · eb1a4a6
2 parents 9548705 + e6549b8
commit eb1a4a6
Show file tree

Hide file tree

Showing 36 changed files with 741 additions and 690 deletions.
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
@@ -4824,6 +4824,12 @@ def HLSLStep: LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_radians"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18896,6 +18896,15 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         retType, CGM.getHLSLRuntime().getSignIntrinsic(),
         ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_radians: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
+           "radians operand must have a float representation");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/Op0->getType(),
+        CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
+        nullptr, "hlsl.radians");
+  }
   }
   return nullptr;
 }

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -83,6 +83,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians)
   GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
   GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
@@ -3738,6 +3738,13 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
 
   const auto *Prev = Current.getPreviousNonComment();
   assert(Prev);
+
+  if (Prev->is(tok::coloncolon))
+    Prev = Prev->Previous;
+
+  if (!Prev)
+    return false;
+
   const auto &Previous = *Prev;
 
   if (const auto *PrevPrev = Previous.getPreviousNonComment();

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2131,6 +2131,11 @@ void UnwrappedLineParser::parseStructuralElement(
         return;
       }
       break;
+    case tok::greater:
+      nextToken();
+      if (FormatTok->is(tok::l_brace))
+        FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
+      break;
     default:
       nextToken();
       break;

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -2138,5 +2138,35 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
 int3 sign(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_sign)
 int4 sign(double4);
+
+//===----------------------------------------------------------------------===//
+// radians builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T radians(T Val)
+/// \brief Converts the specified value from degrees to radians.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+half radians(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+half2 radians(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+half3 radians(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+half4 radians(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+float radians(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+float2 radians(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+float3 radians(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
+float4 radians(float4);
+
 } // namespace hlsl
 #endif //_HLSL_HLSL_INTRINSICS_H_
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
@@ -1896,6 +1896,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_radians:
   case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))

diff --git a/clang/test/CodeGenHLSL/builtins/radians.hlsl b/clang/test/CodeGenHLSL/builtins/radians.hlsl
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DTARGET=dx -DFNATTRS=noundef
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DTARGET=dx -DFNATTRS=noundef
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF \
+// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef"
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN:   -DTARGET=spv -DFNATTRS="spir_func noundef"
+
+
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: %{{.*}} = call half @llvm.[[TARGET]].radians.f16(
+// NATIVE_HALF: ret half %{{.*}}
+// NO_HALF: define [[FNATTRS]] float @
+// NO_HALF: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
+// NO_HALF: ret float %{{.*}}
+half test_radians_half(half p0) { return radians(p0); }
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: %{{.*}} = call <2 x half> @llvm.[[TARGET]].radians.v2f16
+// NATIVE_HALF: ret <2 x half> %{{.*}}
+// NO_HALF: define [[FNATTRS]] <2 x float> @
+// NO_HALF: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32(
+// NO_HALF: ret <2 x float> %{{.*}}
+half2 test_radians_half2(half2 p0) { return radians(p0); }
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: %{{.*}} = call <3 x half> @llvm.[[TARGET]].radians.v3f16
+// NATIVE_HALF: ret <3 x half> %{{.*}}
+// NO_HALF: define [[FNATTRS]] <3 x float> @
+// NO_HALF: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32(
+// NO_HALF: ret <3 x float> %{{.*}}
+half3 test_radians_half3(half3 p0) { return radians(p0); }
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: %{{.*}} = call <4 x half> @llvm.[[TARGET]].radians.v4f16
+// NATIVE_HALF: ret <4 x half> %{{.*}}
+// NO_HALF: define [[FNATTRS]] <4 x float> @
+// NO_HALF: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32(
+// NO_HALF: ret <4 x float> %{{.*}}
+half4 test_radians_half4(half4 p0) { return radians(p0); }
+
+// CHECK: define [[FNATTRS]] float @
+// CHECK: %{{.*}} = call float @llvm.[[TARGET]].radians.f32(
+// CHECK: ret float %{{.*}}
+float test_radians_float(float p0) { return radians(p0); }
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %{{.*}} = call <2 x float> @llvm.[[TARGET]].radians.v2f32
+// CHECK: ret <2 x float> %{{.*}}
+float2 test_radians_float2(float2 p0) { return radians(p0); }
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %{{.*}} = call <3 x float> @llvm.[[TARGET]].radians.v3f32
+// CHECK: ret <3 x float> %{{.*}}
+float3 test_radians_float3(float3 p0) { return radians(p0); }
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %{{.*}} = call <4 x float> @llvm.[[TARGET]].radians.v4f32
+// CHECK: ret <4 x float> %{{.*}}
+float4 test_radians_float4(float4 p0) { return radians(p0); }
+
diff --git a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
@@ -17,6 +17,7 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tan
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tanh
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_trunc
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_hlsl_elementwise_radians
 
 double test_double_builtin(double p0) {
     return TEST_FUNC(p0);

diff --git a/clang/test/SemaHLSL/BuiltIns/radians-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/radians-errors.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
+
+float test_too_few_arg() {
+  return __builtin_hlsl_elementwise_radians();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_radians(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float builtin_bool_to_float_type_promotion(bool p1) {
+  return __builtin_hlsl_elementwise_radians(p1);
+  // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+float builtin_radians_int_to_float_promotion(int p1) {
+  return __builtin_hlsl_elementwise_radians(p1);
+  // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+float2 builtin_radians_int2_to_float2_promotion(int2 p1) {
+  return __builtin_hlsl_elementwise_radians(p1);
+  // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
+
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1007,6 +1007,14 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) {
   EXPECT_TOKEN(Tokens[6], tok::r_paren, TT_OverloadedOperator);
   EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_OverloadedOperatorLParen);
   EXPECT_TOKEN(Tokens[9], tok::amp, TT_PointerOrReference);
+
+  Tokens = annotate("friend ostream& ::operator<<(ostream& lhs, foo& rhs);");
+  ASSERT_EQ(Tokens.size(), 17u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::kw_operator, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[5], tok::lessless, TT_OverloadedOperator);
+  EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_OverloadedOperatorLParen);
+  EXPECT_TOKEN(Tokens[8], tok::amp, TT_PointerOrReference);
+  EXPECT_TOKEN(Tokens[12], tok::amp, TT_PointerOrReference);
 }
 
 TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) {
@@ -3546,6 +3554,11 @@ TEST_F(TokenAnnotatorTest, TemplateInstantiation) {
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
   EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);
+
+  Tokens = annotate("return std::conditional_t<T::value == U::value, T, U>{};");
+  ASSERT_EQ(Tokens.size(), 21u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
+  EXPECT_TOKEN(Tokens[16], tok::greater, TT_TemplateCloser);
 }
 
 } // namespace

diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h
@@ -9,7 +9,6 @@
 #ifndef FORTRAN_RUNTIME_CUDA_COMMON_H_
 #define FORTRAN_RUNTIME_CUDA_COMMON_H_
 
-#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
 #include "flang/Runtime/descriptor.h"
 #include "flang/Runtime/entry-names.h"
 
@@ -35,16 +34,4 @@ static constexpr unsigned kDeviceToDevice = 2;
     terminator.Crash("'%s' failed with '%s'", #expr, name); \
   }(expr)
 
-static inline unsigned getMemType(cuf::DataAttribute attr) {
-  if (attr == cuf::DataAttribute::Device)
-    return kMemTypeDevice;
-  if (attr == cuf::DataAttribute::Managed)
-    return kMemTypeManaged;
-  if (attr == cuf::DataAttribute::Unified)
-    return kMemTypeUnified;
-  if (attr == cuf::DataAttribute::Pinned)
-    return kMemTypePinned;
-  llvm::report_fatal_error("unsupported memory type");
-}
-
 #endif // FORTRAN_RUNTIME_CUDA_COMMON_H_
diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp
@@ -36,6 +36,18 @@ using namespace Fortran::runtime::cuda;
 
 namespace {
 
+static inline unsigned getMemType(cuf::DataAttribute attr) {
+  if (attr == cuf::DataAttribute::Device)
+    return kMemTypeDevice;
+  if (attr == cuf::DataAttribute::Managed)
+    return kMemTypeManaged;
+  if (attr == cuf::DataAttribute::Unified)
+    return kMemTypeUnified;
+  if (attr == cuf::DataAttribute::Pinned)
+    return kMemTypePinned;
+  llvm::report_fatal_error("unsupported memory type");
+}
+
 template <typename OpTy>
 static bool isPinned(OpTy op) {
   if (op.getDataAttr() && *op.getDataAttr() == cuf::DataAttribute::Pinned)

diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
@@ -34,10 +34,10 @@ described in the `clang documentation
 by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
 through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.
 
-In order or link the GPU runtime, we simply pass this library to the embedded 
-device linker job. This can be done using the ``-Xoffload-linker`` option, which 
-forwards an argument to a ``clang`` job used to create the final GPU executable. 
-The toolchain should pick up the C libraries automatically in most cases, so 
+In order or link the GPU runtime, we simply pass this library to the embedded
+device linker job. This can be done using the ``-Xoffload-linker`` option, which
+forwards an argument to a ``clang`` job used to create the final GPU executable.
+The toolchain should pick up the C libraries automatically in most cases, so
 this shouldn't be necessary.
 
 .. code-block:: sh
@@ -189,7 +189,7 @@ final executable.
 
   #include <stdio.h>
 
-  int main() { fputs("Hello from AMDGPU!\n", stdout); }
+  int main() { printf("Hello from AMDGPU!\n"); }
 
 This program can then be compiled using the ``clang`` compiler. Note that
 ``-flto`` and ``-mcpu=`` should be defined. This is because the GPU
@@ -227,28 +227,26 @@ Building for NVPTX targets
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The infrastructure is the same as the AMDGPU example. However, the NVPTX binary
-utilities are very limited and must be targeted directly. There is no linker
-support for static libraries so we need to link in the ``libc.bc`` bitcode and
-inform the compiler driver of the file's contents.
+utilities are very limited and must be targeted directly. A utility called
+``clang-nvlink-wrapper`` instead wraps around the standard link job to give the
+illusion that ``nvlink`` is a functional linker.
 
 .. code-block:: c++
 
   #include <stdio.h>
 
   int main(int argc, char **argv, char **envp) {
-    fputs("Hello from NVPTX!\n", stdout);
+    printf("Hello from NVPTX!\n");
   }
 
 Additionally, the NVPTX ABI requires that every function signature matches. This
 requires us to pass the full prototype from ``main``. The installation will
 contain the ``nvptx-loader`` utility if the CUDA driver was found during
-compilation.
+compilation. Using link time optimization will help hide this.
 
 .. code-block:: sh
 
-  $> clang hello.c --target=nvptx64-nvidia-cuda -march=native \
-       -x ir <install>/lib/nvptx64-nvidia-cuda/libc.bc \
-       -x ir <install>/lib/nvptx64-nvidia-cuda/crt1.o
+  $> clang hello.c --target=nvptx64-nvidia-cuda -mcpu=native -flto -lc <install>/lib/nvptx64-nvidia-cuda/crt1.o
   $> nvptx-loader --threads 2 --blocks 2 a.out
   Hello from NVPTX!
   Hello from NVPTX!

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -86,4 +86,5 @@ def int_dx_rsqrt  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]
 def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
 def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
 def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
+def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 }