From d56e0d07cc5ee8e334fd1ad403eef0b1a771384f Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Fri, 1 Dec 2023 08:34:44 +0100 Subject: [PATCH 01/72] clang/OpenCL: set sqrt fp accuracy on call to Z4sqrt (#66651) This is reverting the previous implementation to avoid adding inline function in opencl headers. This was breaking clspv flow google/clspv#1231, while https://reviews.llvm.org/D156743 mentioned that just decorating the call node with `!pfmath` was enough. This PR is implementing this idea. The test has been updated with this implementation. --- clang/lib/CodeGen/CGCall.cpp | 4 + clang/lib/Headers/opencl-c-base.h | 58 ----------- clang/lib/Headers/opencl-c.h | 26 +++++ clang/lib/Sema/OpenCLBuiltins.td | 5 +- clang/test/CodeGenOpenCL/sqrt-fpmath.cl | 124 ++++++++++-------------- 5 files changed, 82 insertions(+), 135 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 4c2577126e48b3..a24aeea7ae32bf 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5608,6 +5608,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, BundleList); EmitBlock(Cont); } + if (CI->getCalledFunction() && CI->getCalledFunction()->hasName() && + CI->getCalledFunction()->getName().startswith("_Z4sqrt")) { + SetSqrtFPAccuracy(CI); + } if (callOrInvoke) *callOrInvoke = CI; diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index d56e5ceae652ad..2494f6213fc569 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -819,64 +819,6 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) #endif // cl_intel_device_side_avc_motion_estimation -/** - * Compute square root. - * - * Provide inline implementations using the builtin so that we get appropriate - * !fpmath based on -cl-fp32-correctly-rounded-divide-sqrt, attached to - * llvm.sqrt. The implementation should still provide an external definition. - */ -#define __ovld __attribute__((overloadable)) -#define __cnfn __attribute__((const)) - -inline float __ovld __cnfn sqrt(float __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float2 __ovld __cnfn sqrt(float2 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float3 __ovld __cnfn sqrt(float3 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float4 __ovld __cnfn sqrt(float4 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float8 __ovld __cnfn sqrt(float8 __x) { - return __builtin_elementwise_sqrt(__x); -} - -inline float16 __ovld __cnfn sqrt(float16 __x) { - return __builtin_elementwise_sqrt(__x); -} - -// We only really want to define the float variants here. However -// -fdeclare-opencl-builtins will not work if some overloads are already - // provided in the base header, so provide all overloads here. - -#ifdef cl_khr_fp64 -double __ovld __cnfn sqrt(double); -double2 __ovld __cnfn sqrt(double2); -double3 __ovld __cnfn sqrt(double3); -double4 __ovld __cnfn sqrt(double4); -double8 __ovld __cnfn sqrt(double8); -double16 __ovld __cnfn sqrt(double16); -#endif //cl_khr_fp64 -#ifdef cl_khr_fp16 -half __ovld __cnfn sqrt(half); -half2 __ovld __cnfn sqrt(half2); -half3 __ovld __cnfn sqrt(half3); -half4 __ovld __cnfn sqrt(half4); -half8 __ovld __cnfn sqrt(half8); -half16 __ovld __cnfn sqrt(half16); -#endif //cl_khr_fp16 - -#undef __cnfn -#undef __ovld - // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 1efbbf8f8ee6a0..288bb18bc654eb 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -8496,6 +8496,32 @@ half8 __ovld __cnfn sinpi(half8); half16 __ovld __cnfn sinpi(half16); #endif //cl_khr_fp16 +/** + * Compute square root. + */ +float __ovld __cnfn sqrt(float); +float2 __ovld __cnfn sqrt(float2); +float3 __ovld __cnfn sqrt(float3); +float4 __ovld __cnfn sqrt(float4); +float8 __ovld __cnfn sqrt(float8); +float16 __ovld __cnfn sqrt(float16); +#ifdef cl_khr_fp64 +double __ovld __cnfn sqrt(double); +double2 __ovld __cnfn sqrt(double2); +double3 __ovld __cnfn sqrt(double3); +double4 __ovld __cnfn sqrt(double4); +double8 __ovld __cnfn sqrt(double8); +double16 __ovld __cnfn sqrt(double16); +#endif //cl_khr_fp64 +#ifdef cl_khr_fp16 +half __ovld __cnfn sqrt(half); +half2 __ovld __cnfn sqrt(half2); +half3 __ovld __cnfn sqrt(half3); +half4 __ovld __cnfn sqrt(half4); +half8 __ovld __cnfn sqrt(half8); +half16 __ovld __cnfn sqrt(half16); +#endif //cl_khr_fp16 + /** * Compute tangent. */ diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index 9db450281912d2..0cceba090bd8f2 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -563,15 +563,12 @@ foreach name = ["acos", "acosh", "acospi", "log", "log2", "log10", "log1p", "logb", "rint", "round", "rsqrt", "sin", "sinh", "sinpi", + "sqrt", "tan", "tanh", "tanpi", "tgamma", "trunc", "lgamma"] in { def : Builtin; } - -// sqrt is handled in opencl-c-base.h to handle -// -cl-fp32-correctly-rounded-divide-sqrt. - foreach name = ["nan"] in { def : Builtin; def : Builtin; diff --git a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl index df30085cba2e7d..7afde7f91bdfeb 100644 --- a/clang/test/CodeGenOpenCL/sqrt-fpmath.cl +++ b/clang/test/CodeGenOpenCL/sqrt-fpmath.cl @@ -3,11 +3,15 @@ // depending on -cl-fp32-correctly-rounded-divide-sqrt // Test with -fdeclare-opencl-builtins -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,DEFAULT %s < %t.ll +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s < %t.ll -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s -// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s < %t.ll +// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o %t.ll %s +// RUN: FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s < %t.ll // Test without -fdeclare-opencl-builtins // RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s @@ -19,183 +23,157 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable // CHECK-LABEL: define {{.*}} float @call_sqrt_f32( -// CHECK: call {{.*}} float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+$}} +// DEFAULT: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}} +// CORRECTLYROUNDED: call float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH:\![0-9]+]]{{$}} +// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+}}{{$}} float call_sqrt_f32(float x) { return sqrt(x); } -// CHECK-LABEL: define available_externally float @_Z4sqrtf(float noundef %__x) -// DEFAULT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call float @llvm.sqrt.f32(float %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}){{$}} - // CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32( -// CHECK: call {{.*}} <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.*}}) #{{[0-9]+$}} +// DEFAULT: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} float2 call_sqrt_v2f32(float2 x) { return sqrt(x); } -// CHECK-LABEL: define available_externally <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %__x) -// DEFAULT: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}} // CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32( -// CHECK: call {{.*}} <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.*}}) #{{[0-9]+$}} +// DEFAULT: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} float3 call_sqrt_v3f32(float3 x) { return sqrt(x); } -// CHECK-LABEL: define available_externally <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %__x) -// DEFAULT: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}} // CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32( -// CHECK: call {{.*}} <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.*}}) #{{[0-9]+$}} +// DEFAULT: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} float4 call_sqrt_v4f32(float4 x) { return sqrt(x); } -// CHECK-LABEL: define available_externally <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %__x) -// DEFAULT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}} // CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32( -// CHECK: call {{.*}} <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.*}}) #{{[0-9]+$}} +// DEFAULT: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} float8 call_sqrt_v8f32(float8 x) { return sqrt(x); } -// CHECK-LABEL: define available_externally <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %__x) -// DEFAULT: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}} // CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32( -// CHECK: call {{.*}} <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.*}}) #{{[0-9]+$}} +// DEFAULT: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLYROUNDED: call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} + +// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[FPMATH]]{{$}} +// CORRECTLY-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.+}}) #{{[0-9]+}}{{$}} float16 call_sqrt_v16f32(float16 x) { return sqrt(x); } -// CHECK-LABEL: define available_externally <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %__x) -// DEFAULT: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}} - -// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}} -// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}} // Not for f64 // CHECK-LABEL: define {{.*}} double @call_sqrt_f64( -// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}{{$}} double call_sqrt_f64(double x) { return sqrt(x); } -// CHECK-NOT: define // Not for f64 // CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64( -// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} double2 call_sqrt_v2f64(double2 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64( -// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} double3 call_sqrt_v3f64(double3 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64( -// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} double4 call_sqrt_v4f64(double4 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64( -// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} double8 call_sqrt_v8f64(double8 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64( -// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}{{$}} double16 call_sqrt_v16f64(double16 x) { return sqrt(x); } -// CHECK-NOT: define // Not for f16 // CHECK-LABEL: define {{.*}} half @call_sqrt_f16( -// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}} half call_sqrt_f16(half x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16( -// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half2 call_sqrt_v2f16(half2 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16( -// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half3 call_sqrt_v3f16(half3 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16( -// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half4 call_sqrt_v4f16(half4 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16( -// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half8 call_sqrt_v8f16(half8 x) { return sqrt(x); } -// CHECK-NOT: define // CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16( -// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}} +// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}} half16 call_sqrt_v16f16(half16 x) { return sqrt(x); } -// CHECK-NOT: define - -// DEFAULT: [[$FPMATH]] = !{float 3.000000e+00} +// DEFAULT: [[FPMATH]] = !{float 3.000000e+00} From 5a9354832695d878e86f90010d2b043a9551b072 Mon Sep 17 00:00:00 2001 From: paperchalice Date: Fri, 1 Dec 2023 15:43:48 +0800 Subject: [PATCH 02/72] [CodeGen][NFC] Sort and format MachinePassRegistry.def (#74044) Same as #73762. --- .../llvm/CodeGen/MachinePassRegistry.def | 188 ++++++++++-------- 1 file changed, 105 insertions(+), 83 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def index 47dd8f2cc46483..fc2d07fd6616fc 100644 --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -29,29 +29,30 @@ MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ()) #define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) #endif FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) -FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, (std::move(TM.getTargetIRAnalysis()))) +FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, + (std::move(TM.getTargetIRAnalysis()))) #undef FUNCTION_ANALYSIS #ifndef FUNCTION_PASS #define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) #endif FUNCTION_PASS("callbrprepare", CallBrPreparePass, ()) -FUNCTION_PASS("safe-stack", SafeStackPass, (TM)) -FUNCTION_PASS("mergeicmps", MergeICmpsPass, ()) -FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ()) -FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) FUNCTION_PASS("consthoist", ConstantHoistingPass, ()) -FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ()) -FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) -FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ()) FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, ()) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) +FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) +FUNCTION_PASS("mergeicmps", MergeICmpsPass, ()) +FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) +FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) +FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ()) +FUNCTION_PASS("safe-stack", SafeStackPass, (TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ()) FUNCTION_PASS("tlshoist", TLSVariableHoistPass, ()) +FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) FUNCTION_PASS("verify", VerifierPass, ()) #undef FUNCTION_PASS @@ -69,7 +70,8 @@ LOOP_PASS("loop-reduce", LoopStrengthReducePass, ()) #ifndef MACHINE_FUNCTION_ANALYSIS #define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) #endif -MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, + (PIC)) // LiveVariables currently requires pure SSA form. // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, // LiveVariables can be removed completely, and LiveIntervals can be directly @@ -80,18 +82,24 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, ( // MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksPass()) // MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis()) // MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", LazyMachineBlockFrequencyInfoAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", +// LazyMachineBlockFrequencyInfoAnalysis()) // MACHINE_FUNCTION_ANALYSIS("machine-bfi", MachineBlockFrequencyInfoAnalysis()) // MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopInfoAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-dom-frontier", MachineDominanceFrontierAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-dom-frontier", +// MachineDominanceFrontierAnalysis()) // MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-ore", MachineOptimizationRemarkEmitterPassAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", MachinePostDominatorTreeAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-region-info", MachineRegionInfoPassAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysisAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) -// MACHINE_FUNCTION_ANALYSIS("gc-analysis", GCMachineCodeAnalysisPass()) +// MACHINE_FUNCTION_ANALYSIS("machine-ore", +// MachineOptimizationRemarkEmitterPassAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", +// MachinePostDominatorTreeAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-region-info", +// MachineRegionInfoPassAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", +// MachineTraceMetricsAnalysis()) MACHINE_FUNCTION_ANALYSIS("reaching-def", +// ReachingDefAnalysisAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", +// LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("gc-analysis", +// GCMachineCodeAnalysisPass()) #undef MACHINE_FUNCTION_ANALYSIS #ifndef MACHINE_FUNCTION_PASS @@ -108,22 +116,22 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, ( #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) #endif +DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ()) +DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ()) +DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ()) +DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ()) +DUMMY_FUNCTION_PASS("dwarfehprepare", DwarfEHPass, ()) DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ()) +DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ()) DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) +DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ()) +DUMMY_FUNCTION_PASS("interleaved-access", InterleavedAccessPass, ()) +DUMMY_FUNCTION_PASS("select-optimize", SelectOptimizePass, ()) DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ()) -DUMMY_FUNCTION_PASS("dwarfehprepare", DwarfEHPass, ()) -DUMMY_FUNCTION_PASS("winehprepare", WinEHPass, ()) -DUMMY_FUNCTION_PASS("wasmehprepare", WasmEHPass, ()) -DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ()) DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ()) -DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ()) -DUMMY_FUNCTION_PASS("interleaved-access", InterleavedAccessPass, ()) -DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ()) -DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ()) -DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ()) -DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ()) -DUMMY_FUNCTION_PASS("select-optimize", SelectOptimizePass, ()) +DUMMY_FUNCTION_PASS("wasmehprepare", WasmEHPass, ()) +DUMMY_FUNCTION_PASS("winehprepare", WinEHPass, ()) #undef DUMMY_FUNCTION_PASS #ifndef DUMMY_MODULE_PASS @@ -141,71 +149,85 @@ DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass, ()) #ifndef DUMMY_MACHINE_FUNCTION_PASS #define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) #endif -DUMMY_MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", + MachineBlockPlacementStatsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("dead-mi-elimination", + DeadMachineInstructionElimPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ()) DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, + ()) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("removeredundantdebugvalues", RemoveRedundantDebugValuesPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter, ()) +DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ()) -DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("liveintervals", LiveIntervalsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, + ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machine-sink", MachineSinkingPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-uniformity", + MachineUniformityInfoWrapperPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass, ()) DUMMY_MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass, ()) DUMMY_MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("twoaddressinstruction", TwoAddressInstructionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass, + ()) +DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", + MachineUniformityInfoPrinterPass, ()) DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("liveintervals", LiveIntervalsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass, ()) -DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass, ()) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass, ()) DUMMY_MACHINE_FUNCTION_PASS("ra-fast", RAFastPass, ()) DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass, ()) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, + ()) +DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", + RegUsageInfoPropagationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass, ()) DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata, ()) -DUMMY_MACHINE_FUNCTION_PASS("machine-uniformity", MachineUniformityInfoWrapperPass, ()) -DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", MachineUniformityInfoPrinterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("removeredundantdebugvalues", + RemoveRedundantDebugValuesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", + RenameIndependentSubregsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, + ()) +DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass, + ()) +DUMMY_MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass, ()) +DUMMY_MACHINE_FUNCTION_PASS("twoaddressinstruction", TwoAddressInstructionPass, + ()) +DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ()) #undef DUMMY_MACHINE_FUNCTION_PASS From 520c3b82db7199c1dcd24520f3c0ac573c191791 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 30 Nov 2023 23:45:12 -0800 Subject: [PATCH 03/72] [llvm] Stop including llvm/ADT/StringSet.h (NFC) Identified with clangd. --- llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h | 1 - llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp | 1 - llvm/lib/Transforms/Utils/MoveAutoInit.cpp | 1 - llvm/tools/llvm-dwarfutil/Error.h | 1 - llvm/tools/llvm-exegesis/lib/BenchmarkResult.h | 1 - llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 1 - llvm/tools/llvm-objcopy/ObjcopyOptions.cpp | 1 - 7 files changed, 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h index 0c79aecdd2457f..f5dce01c34e7aa 100644 --- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h +++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h @@ -15,7 +15,6 @@ #ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H #define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H -#include "llvm/ADT/StringSet.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp index cdc1158ce1c4ca..c6ffd9f7c2e3c5 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" diff --git a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp index 6f5f34461beaf7..a977ad87b79f51 100644 --- a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp +++ b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp @@ -14,7 +14,6 @@ #include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/llvm/tools/llvm-dwarfutil/Error.h b/llvm/tools/llvm-dwarfutil/Error.h index b92c50ca5a452a..fff5978a9d1ad4 100644 --- a/llvm/tools/llvm-dwarfutil/Error.h +++ b/llvm/tools/llvm-dwarfutil/Error.h @@ -11,7 +11,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h index 8a7faa0176e324..38111519a2c898 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -18,7 +18,6 @@ #include "LlvmState.h" #include "RegisterValue.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/YAMLTraits.h" diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index 46ec4bdc28709f..094dca22f77b03 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/Archive.h" diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index d33adb0b6a3e47..57129025394437 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -10,7 +10,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/ObjCopy/CommonConfig.h" #include "llvm/ObjCopy/ConfigManager.h" From bc265bd663233c4bfa222f1cc93ec472075a53ff Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 30 Nov 2023 23:52:26 -0800 Subject: [PATCH 04/72] [llvm-reduce] Stop including llvm/ADT/SetVector.h (NFC) Identified with clangd. --- llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp index 05127ec7b9c808..fdac4a3bf708e2 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp @@ -16,7 +16,6 @@ #include "Delta.h" #include "Utils.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include From 604c29e9340c4a18eab1e53dd5cc4c05d46db2f7 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Fri, 1 Dec 2023 09:10:29 +0100 Subject: [PATCH 05/72] [AMDGPU] NFC. Add test for debug info on CFG annotation instructions. (#73959) --- .../CodeGen/AMDGPU/si-annotate-dbg-info.ll | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll new file mode 100644 index 00000000000000..703eeb5df86e50 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s + +define amdgpu_ps i32 @if_else(i32 %0) !dbg !5 { +; OPT-LABEL: define amdgpu_ps i32 @if_else( +; OPT-SAME: i32 [[TMP0:%.*]]) !dbg [[DBG5:![0-9]+]] { +; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0, !dbg [[DBG13:![0-9]+]] +; OPT-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13]] +; OPT-NEXT: [[TMP2:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]]) +; OPT-NEXT: [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP2]], 0 +; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP2]], 1 +; OPT-NEXT: br i1 [[TMP3]], label [[FALSE:%.*]], label [[FLOW:%.*]], !dbg [[DBG14:![0-9]+]] +; OPT: Flow: +; OPT-NEXT: [[TMP5:%.*]] = phi i32 [ 33, [[FALSE]] ], [ undef, [[TMP1:%.*]] ] +; OPT-NEXT: [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 [[TMP4]]) +; OPT-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0 +; OPT-NEXT: [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1 +; OPT-NEXT: br i1 [[TMP7]], label [[TRUE:%.*]], label [[EXIT:%.*]], !dbg [[DBG14]] +; OPT: true: +; OPT-NEXT: br label [[EXIT]], !dbg [[DBG15:![0-9]+]] +; OPT: false: +; OPT-NEXT: br label [[FLOW]], !dbg [[DBG16:![0-9]+]] +; OPT: exit: +; OPT-NEXT: [[RET:%.*]] = phi i32 [ [[TMP5]], [[FLOW]] ], [ 42, [[TRUE]] ], !dbg [[DBG17:![0-9]+]] +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]]) +; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[RET]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; OPT-NEXT: ret i32 [[RET]], !dbg [[DBG18:![0-9]+]] +; + %c = icmp eq i32 %0, 0, !dbg !13 + tail call void @llvm.dbg.value(metadata i1 %c, metadata !9, metadata !DIExpression()), !dbg !13 + br i1 %c, label %true, label %false, !dbg !14 + +true: ; preds = %1 + br label %exit, !dbg !15 + +false: ; preds = %1 + br label %exit, !dbg !16 + +exit: ; preds = %false, %true + %ret = phi i32 [ 42, %true ], [ 33, %false ], !dbg !17 + tail call void @llvm.dbg.value(metadata i32 %ret, metadata !11, metadata !DIExpression()), !dbg !17 + ret i32 %ret, !dbg !18 +} + +define amdgpu_ps void @loop_if_break(i32 %n) !dbg !19 { +; OPT-LABEL: define amdgpu_ps void @loop_if_break( +; OPT-SAME: i32 [[N:%.*]]) !dbg [[DBG19:![0-9]+]] { +; OPT-NEXT: entry: +; OPT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG24:![0-9]+]] +; OPT: loop: +; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP5:%.*]], [[FLOW:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FLOW]] ], !dbg [[DBG25:![0-9]+]] +; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0, !dbg [[DBG26:![0-9]+]] +; OPT-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26]] +; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]]) +; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0 +; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1 +; OPT-NEXT: br i1 [[TMP1]], label [[LOOP_BODY:%.*]], label [[FLOW]], !dbg [[DBG27:![0-9]+]] +; OPT: loop_body: +; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1, !dbg [[DBG28:![0-9]+]] +; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I_NEXT]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] +; OPT-NEXT: br label [[FLOW]], !dbg [[DBG29:![0-9]+]] +; OPT: Flow: +; OPT-NEXT: [[TMP3]] = phi i32 [ [[I_NEXT]], [[LOOP_BODY]] ], [ undef, [[LOOP]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[LOOP_BODY]] ], [ true, [[LOOP]] ] +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) +; OPT-NEXT: [[TMP5]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP6:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP5]]) +; OPT-NEXT: br i1 [[TMP6]], label [[EXIT:%.*]], label [[LOOP]], !dbg [[DBG27]] +; OPT: exit: +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP5]]) +; OPT-NEXT: ret void, !dbg [[DBG30:![0-9]+]] +; +entry: + br label %loop, !dbg !24 + +loop: ; preds = %loop_body, %entry + %i = phi i32 [ %n, %entry ], [ %i.next, %loop_body ], !dbg !25 + tail call void @llvm.dbg.value(metadata i32 %i, metadata !21, metadata !DIExpression()), !dbg !25 + %c = icmp ugt i32 %i, 0, !dbg !26 + tail call void @llvm.dbg.value(metadata i1 %c, metadata !22, metadata !DIExpression()), !dbg !26 + br i1 %c, label %loop_body, label %exit, !dbg !27 + +loop_body: ; preds = %loop + %i.next = sub i32 %i, 1, !dbg !28 + tail call void @llvm.dbg.value(metadata i32 %i.next, metadata !23, metadata !DIExpression()), !dbg !28 + br label %loop, !dbg !29 + +exit: ; preds = %loop + ret void, !dbg !30 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!2, !3} +!llvm.module.flags = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "../../../test/CodeGen/AMDGPU/si-annotate-dbg-info.ll", directory: "/") +!2 = !{i32 13} +!3 = !{i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "if_else", linkageName: "if_else", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8) +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !{!9, !11} +!9 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !10) +!10 = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_unsigned) +!11 = !DILocalVariable(name: "2", scope: !5, file: !1, line: 5, type: !12) +!12 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!13 = !DILocation(line: 1, column: 1, scope: !5) +!14 = !DILocation(line: 2, column: 1, scope: !5) +!15 = !DILocation(line: 3, column: 1, scope: !5) +!16 = !DILocation(line: 4, column: 1, scope: !5) +!17 = !DILocation(line: 5, column: 1, scope: !5) +!18 = !DILocation(line: 6, column: 1, scope: !5) +!19 = distinct !DISubprogram(name: "loop_if_break", linkageName: "loop_if_break", scope: null, file: !1, line: 7, type: !6, scopeLine: 7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20) +!20 = !{!21, !22, !23} +!21 = !DILocalVariable(name: "3", scope: !19, file: !1, line: 8, type: !12) +!22 = !DILocalVariable(name: "4", scope: !19, file: !1, line: 9, type: !10) +!23 = !DILocalVariable(name: "5", scope: !19, file: !1, line: 11, type: !12) +!24 = !DILocation(line: 7, column: 1, scope: !19) +!25 = !DILocation(line: 8, column: 1, scope: !19) +!26 = !DILocation(line: 9, column: 1, scope: !19) +!27 = !DILocation(line: 10, column: 1, scope: !19) +!28 = !DILocation(line: 11, column: 1, scope: !19) +!29 = !DILocation(line: 12, column: 1, scope: !19) +!30 = !DILocation(line: 13, column: 1, scope: !19) +;. +; OPT: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +; OPT: [[META1]] = !DIFile(filename: "../../../test/CodeGen/AMDGPU/si-annotate-dbg-info.ll", directory: {{.*}}) +; OPT: [[DBG5]] = distinct !DISubprogram(name: "if_else", linkageName: "if_else", scope: null, file: [[META1]], line: 1, type: [[META6:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META8:![0-9]+]]) +; OPT: [[META6]] = !DISubroutineType(types: [[META7:![0-9]+]]) +; OPT: [[META7]] = !{} +; OPT: [[META8]] = !{[[META9]], [[META11]]} +; OPT: [[META9]] = !DILocalVariable(name: "1", scope: [[DBG5]], file: [[META1]], line: 1, type: [[META10:![0-9]+]]) +; OPT: [[META10]] = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_unsigned) +; OPT: [[META11]] = !DILocalVariable(name: "2", scope: [[DBG5]], file: [[META1]], line: 5, type: [[META12:![0-9]+]]) +; OPT: [[META12]] = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +; OPT: [[DBG13]] = !DILocation(line: 1, column: 1, scope: [[DBG5]]) +; OPT: [[DBG14]] = !DILocation(line: 2, column: 1, scope: [[DBG5]]) +; OPT: [[DBG15]] = !DILocation(line: 3, column: 1, scope: [[DBG5]]) +; OPT: [[DBG16]] = !DILocation(line: 4, column: 1, scope: [[DBG5]]) +; OPT: [[DBG17]] = !DILocation(line: 5, column: 1, scope: [[DBG5]]) +; OPT: [[DBG18]] = !DILocation(line: 6, column: 1, scope: [[DBG5]]) +; OPT: [[DBG19]] = distinct !DISubprogram(name: "loop_if_break", linkageName: "loop_if_break", scope: null, file: [[META1]], line: 7, type: [[META6]], scopeLine: 7, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META20:![0-9]+]]) +; OPT: [[META20]] = !{[[META21]], [[META22]], [[META23]]} +; OPT: [[META21]] = !DILocalVariable(name: "3", scope: [[DBG19]], file: [[META1]], line: 8, type: [[META12]]) +; OPT: [[META22]] = !DILocalVariable(name: "4", scope: [[DBG19]], file: [[META1]], line: 9, type: [[META10]]) +; OPT: [[META23]] = !DILocalVariable(name: "5", scope: [[DBG19]], file: [[META1]], line: 11, type: [[META12]]) +; OPT: [[DBG24]] = !DILocation(line: 7, column: 1, scope: [[DBG19]]) +; OPT: [[DBG25]] = !DILocation(line: 8, column: 1, scope: [[DBG19]]) +; OPT: [[DBG26]] = !DILocation(line: 9, column: 1, scope: [[DBG19]]) +; OPT: [[DBG27]] = !DILocation(line: 10, column: 1, scope: [[DBG19]]) +; OPT: [[DBG28]] = !DILocation(line: 11, column: 1, scope: [[DBG19]]) +; OPT: [[DBG29]] = !DILocation(line: 12, column: 1, scope: [[DBG19]]) +; OPT: [[DBG30]] = !DILocation(line: 13, column: 1, scope: [[DBG19]]) +;. From 0e163e75d44cfa024092cda5099bd41af2218215 Mon Sep 17 00:00:00 2001 From: Shengchen Kan Date: Fri, 1 Dec 2023 16:18:33 +0800 Subject: [PATCH 06/72] [X86][MC] Not emit {evex} for VEX-promoted instructions with GPR operands (#74039) To align with 1. GNU binutils's behavior for APX instructions 2. LLVM's behaviour for EVEX intructions with VEX variant --- llvm/lib/Target/X86/X86InstrAVX512.td | 7 ++--- llvm/test/MC/Disassembler/X86/apx/kmov.txt | 35 ++++++++++++---------- llvm/test/MC/X86/apx/kmov-att.s | 28 +++++++++-------- llvm/test/MC/X86/apx/kmov-intel.s | 28 +++++++++-------- 4 files changed, 52 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0514f0d1950670..77b359e84fbd2d 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2855,8 +2855,8 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, string OpcodeStr, RegisterClass KRC, ValueType vvt, X86MemOperand x86memop, string Suffix = ""> { - let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in { - let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in + let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove], + explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in def kk#Suffix : I, Sched<[WriteMove]>; @@ -2868,13 +2868,12 @@ multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store KRC:$src, addr:$dst)]>, Sched<[WriteStore]>; - } } multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, string OpcodeStr, RegisterClass KRC, RegisterClass GRC, string Suffix = ""> { - let hasSideEffects = 0, explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in { + let hasSideEffects = 0 in { def kr#Suffix : I, Sched<[WriteMove]>; diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt b/llvm/test/MC/Disassembler/X86/apx/kmov.txt index d089ef192230a5..5d947ff39f2314 100644 --- a/llvm/test/MC/Disassembler/X86/apx/kmov.txt +++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt @@ -1,6 +1,25 @@ # RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT # RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL +# ATT: {evex} kmovb %k1, %k2 +# INTEL: {evex} kmovb k2, k1 +0x62,0xf1,0x7d,0x08,0x90,0xd1 + +# ATT: {evex} kmovw %k1, %k2 +# INTEL: {evex} kmovw k2, k1 +0x62,0xf1,0x7c,0x08,0x90,0xd1 + +# ATT: {evex} kmovd %k1, %k2 +# INTEL: {evex} kmovd k2, k1 +0x62,0xf1,0xfd,0x08,0x90,0xd1 + +# ATT: {evex} kmovq %k1, %k2 +# INTEL: {evex} kmovq k2, k1 +0x62,0xf1,0xfc,0x08,0x90,0xd1 + +# ATT-NOT: {evex} +# INTEL-NOT: {evex} + # ATT: kmovb %r16d, %k1 # INTEL: kmovb k1, r16d 0x62,0xf9,0x7d,0x08,0x92,0xc8 @@ -64,19 +83,3 @@ # ATT: kmovq %k1, (%r16,%r17) # INTEL: kmovq qword ptr [r16 + r17], k1 0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08 - -# ATT: {evex} kmovb %k1, %k2 -# INTEL: {evex} kmovb k2, k1 -0x62,0xf1,0x7d,0x08,0x90,0xd1 - -# ATT: {evex} kmovw %k1, %k2 -# INTEL: {evex} kmovw k2, k1 -0x62,0xf1,0x7c,0x08,0x90,0xd1 - -# ATT: {evex} kmovd %k1, %k2 -# INTEL: {evex} kmovd k2, k1 -0x62,0xf1,0xfd,0x08,0x90,0xd1 - -# ATT: {evex} kmovq %k1, %k2 -# INTEL: {evex} kmovq k2, k1 -0x62,0xf1,0xfc,0x08,0x90,0xd1 diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s index be5042cf0a30c8..949ef65be98d4c 100644 --- a/llvm/test/MC/X86/apx/kmov-att.s +++ b/llvm/test/MC/X86/apx/kmov-att.s @@ -3,6 +3,21 @@ # ERROR-COUNT-20: error: # ERROR-NOT: error: +# CHECK: {evex} kmovb %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1] + {evex} kmovb %k1, %k2 +# CHECK: {evex} kmovw %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1] + {evex} kmovw %k1, %k2 +# CHECK: {evex} kmovd %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1] + {evex} kmovd %k1, %k2 +# CHECK: {evex} kmovq %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] + {evex} kmovq %k1, %k2 + +# CHECK-NOT: {evex} + # CHECK: kmovb %r16d, %k1 # CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8] kmovb %r16d, %k1 @@ -54,16 +69,3 @@ # CHECK: kmovq %k1, (%r16,%r17) # CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08] kmovq %k1, (%r16,%r17) - -# CHECK: {evex} kmovb %k1, %k2 -# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1] - {evex} kmovb %k1, %k2 -# CHECK: {evex} kmovw %k1, %k2 -# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1] - {evex} kmovw %k1, %k2 -# CHECK: {evex} kmovd %k1, %k2 -# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1] - {evex} kmovd %k1, %k2 -# CHECK: {evex} kmovq %k1, %k2 -# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] - {evex} kmovq %k1, %k2 diff --git a/llvm/test/MC/X86/apx/kmov-intel.s b/llvm/test/MC/X86/apx/kmov-intel.s index 8ceb29d32dba6c..0cdbd310062eba 100644 --- a/llvm/test/MC/X86/apx/kmov-intel.s +++ b/llvm/test/MC/X86/apx/kmov-intel.s @@ -1,5 +1,20 @@ # RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s +# CHECK: {evex} kmovb k2, k1 +# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1] + {evex} kmovb k2, k1 +# CHECK: {evex} kmovw k2, k1 +# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1] + {evex} kmovw k2, k1 +# CHECK: {evex} kmovd k2, k1 +# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1] + {evex} kmovd k2, k1 +# CHECK: {evex} kmovq k2, k1 +# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] + {evex} kmovq k2, k1 + +# CHECK-NOT: {evex} + # CHECK: kmovb k1, r16d # CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8] kmovb k1, r16d @@ -51,16 +66,3 @@ # CHECK: kmovq qword ptr [r16 + r17], k1 # CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08] kmovq qword ptr [r16 + r17], k1 - -# CHECK: {evex} kmovb k2, k1 -# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1] - {evex} kmovb k2, k1 -# CHECK: {evex} kmovw k2, k1 -# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1] - {evex} kmovw k2, k1 -# CHECK: {evex} kmovd k2, k1 -# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1] - {evex} kmovd k2, k1 -# CHECK: {evex} kmovq k2, k1 -# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] - {evex} kmovq k2, k1 From ab3fdbdfbe7edc62049c602d87be91c3ad3f5e3b Mon Sep 17 00:00:00 2001 From: Allen Date: Fri, 1 Dec 2023 16:20:38 +0800 Subject: [PATCH 07/72] [ValueTracking] Support srem/urem for isKnownNonNullFromDominatingCondition (#74021) Similar to div, the rem should also proof its second operand is non-zero, otherwise it is a UB. Fix https://github.com/llvm/llvm-project/issues/71782 --- llvm/lib/Analysis/ValueTracking.cpp | 3 +- .../ValueTracking/select-known-non-zero.ll | 56 ++++++++++++++++++- .../Transforms/InstCombine/zext-or-icmp.ll | 4 +- 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 250ce739ea5147..ef8fa5826deb94 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2186,7 +2186,8 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, return true; } - if (match(U, m_IDiv(m_Value(), m_Specific(V))) && + if ((match(U, m_IDiv(m_Value(), m_Specific(V))) || + match(U, m_IRem(m_Value(), m_Specific(V)))) && isValidAssumeForContext(cast(U), CtxI, DT)) return true; diff --git a/llvm/test/Analysis/ValueTracking/select-known-non-zero.ll b/llvm/test/Analysis/ValueTracking/select-known-non-zero.ll index 1dc88412041d34..53ed4485c94f08 100644 --- a/llvm/test/Analysis/ValueTracking/select-known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/select-known-non-zero.ll @@ -2,6 +2,8 @@ ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s declare void @llvm.assume(i1) +declare void @use(i64) +declare void @use4(i4) define i1 @select_v_ne_fail(i8 %v, i8 %C, i8 %y) { ; CHECK-LABEL: @select_v_ne_fail( @@ -446,4 +448,56 @@ define i64 @incorrect_safe_div_call_2(i64 %n, i64 %d) { ret i64 %3 } -declare void @use(i64) +; https://alive2.llvm.org/ce/z/Si_B7b +define i4 @icmp_urem(i4 %n, i4 %d) { +; CHECK-LABEL: @icmp_urem( +; CHECK-NEXT: [[TMP1:%.*]] = urem i4 [[N:%.*]], [[D:%.*]] +; CHECK-NEXT: ret i4 [[TMP1]] +; + %1 = icmp eq i4 %d, 0 + %2 = urem i4 %n, %d + %3 = select i1 %1, i4 -1, i4 %2 + ret i4 %3 +} + +define i4 @icmp_urem_clobber_by_call(i4 %n, i4 %d) { +; CHECK-LABEL: @icmp_urem_clobber_by_call( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i4 [[D:%.*]], 0 +; CHECK-NEXT: tail call void @use4(i4 [[D]]) +; CHECK-NEXT: [[TMP2:%.*]] = urem i4 [[N:%.*]], [[D]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i4 -1, i4 [[TMP2]] +; CHECK-NEXT: ret i4 [[TMP3]] +; + %1 = icmp eq i4 %d, 0 + tail call void @use4(i4 %d) + %2 = urem i4 %n, %d + %3 = select i1 %1, i4 -1, i4 %2 + ret i4 %3 +} + +; https://alive2.llvm.org/ce/z/Fn3Wac +define i4 @icmp_srem(i4 %n, i4 %d) { +; CHECK-LABEL: @icmp_srem( +; CHECK-NEXT: [[TMP1:%.*]] = srem i4 [[N:%.*]], [[D:%.*]] +; CHECK-NEXT: ret i4 [[TMP1]] +; + %1 = icmp eq i4 %d, 0 + %2 = srem i4 %n, %d + %3 = select i1 %1, i4 -1, i4 %2 + ret i4 %3 +} + +define i4 @icmp_srem_clobber_by_call(i4 %n, i4 %d) { +; CHECK-LABEL: @icmp_srem_clobber_by_call( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i4 [[D:%.*]], 0 +; CHECK-NEXT: tail call void @use4(i4 [[D]]) +; CHECK-NEXT: [[TMP2:%.*]] = srem i4 [[N:%.*]], [[D]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i4 -1, i4 [[TMP2]] +; CHECK-NEXT: ret i4 [[TMP3]] +; + %1 = icmp eq i4 %d, 0 + tail call void @use4(i4 %d) + %2 = srem i4 %n, %d + %3 = select i1 %1, i4 -1, i4 %2 + ret i4 %3 +} diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index bc0e4bdce29b59..9ec3ddc80c57f7 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -231,9 +231,7 @@ define i1 @PR51762(ptr %i, i32 %t0, i16 %t1, ptr %p, ptr %d, ptr %f, i32 %p2, i1 ; CHECK-NEXT: [[INSERT_INSERT41:%.*]] = or i64 [[INSERT_SHIFT52]], [[INSERT_EXT39]] ; CHECK-NEXT: [[REM:%.*]] = urem i64 [[S1]], [[INSERT_INSERT41]] ; CHECK-NEXT: [[NE:%.*]] = icmp ne i64 [[REM]], 0 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INSERT_INSERT41]], 0 -; CHECK-NEXT: [[SPEC_SELECT57:%.*]] = or i1 [[NE]], [[CMP]] -; CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[SPEC_SELECT57]] to i32 +; CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[NE]] to i32 ; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[D:%.*]], align 4 ; CHECK-NEXT: [[CONV15:%.*]] = sext i16 [[T1]] to i32 ; CHECK-NEXT: [[CMP16:%.*]] = icmp sge i32 [[T2]], [[CONV15]] From d48d1edcf3ed0c1352b3c2864feb873f01d6f9da Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 1 Dec 2023 08:22:18 +0000 Subject: [PATCH 08/72] PowerPC/aix-cc-abi: regenerate test using UTC (NFC) (#73963) Split out the parts of aix-cc-abi.ll that requires to be regenerated by utils/update_mir_test_checks.py into aix-cc-abi-mir.ll, and regenerate it using the script. Regenerate aix-cc-abi.ll using utils/update_llc_test_checks.py. --- llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll | 2068 +++++++++++ llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 3635 +++++++++---------- 2 files changed, 3811 insertions(+), 1892 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll new file mode 100644 index 00000000000000..ccc36530c7957b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll @@ -0,0 +1,2068 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=64BIT %s + +define void @call_test_chars() { + ; 32BIT-LABEL: name: call_test_chars + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 97 + ; 32BIT-NEXT: $r4 = LI 97 + ; 32BIT-NEXT: $r5 = LI 97 + ; 32BIT-NEXT: $r6 = LI 97 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1, implicit-def dead $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_chars + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 97 + ; 64BIT-NEXT: $x4 = LI8 97 + ; 64BIT-NEXT: $x5 = LI8 97 + ; 64BIT-NEXT: $x6 = LI8 97 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + call i8 @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97) + ret void +} + +define signext i8 @test_chars(i8 signext %c1, i8 signext %c2, i8 signext %c3, i8 signext %c4) { + ; 32BIT-LABEL: name: test_chars + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r3 = EXTSB killed renamable $r3 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; + ; 64BIT-LABEL: name: test_chars + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $r3 = ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r6, implicit killed $x6, implicit-def $x3 + ; 64BIT-NEXT: renamable $x3 = EXTSB8 killed renamable $x3 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %conv = sext i8 %c1 to i32 + %conv1 = sext i8 %c2 to i32 + %add = add nsw i32 %conv, %conv1 + %conv2 = sext i8 %c3 to i32 + %add3 = add nsw i32 %add, %conv2 + %conv4 = sext i8 %c4 to i32 + %add5 = add nsw i32 %add3, %conv4 + %conv6 = trunc i32 %add5 to i8 + ret i8 %conv6 +} + +define void @call_test_chars_mix() { + ; 32BIT-LABEL: name: call_test_chars_mix + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 97 + ; 32BIT-NEXT: $r4 = LI 225 + ; 32BIT-NEXT: $r5 = LI 97 + ; 32BIT-NEXT: $r6 = LI -31 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1, implicit-def dead $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_chars_mix + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 97 + ; 64BIT-NEXT: $x4 = LI8 225 + ; 64BIT-NEXT: $x5 = LI8 97 + ; 64BIT-NEXT: $x6 = LI8 -31 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + call i8 @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31) + ret void +} + +define signext i8 @test_chars_mix(i8 signext %c1, i8 zeroext %c2, i8 zeroext %c3, i8 signext %c4) { + ; 32BIT-LABEL: name: test_chars_mix + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r3 = EXTSB killed renamable $r3 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; + ; 64BIT-LABEL: name: test_chars_mix + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $r3 = ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r6, implicit killed $x6, implicit-def $x3 + ; 64BIT-NEXT: renamable $x3 = EXTSB8 killed renamable $x3 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %conv = sext i8 %c1 to i32 + %conv1 = zext i8 %c2 to i32 + %add = add nsw i32 %conv, %conv1 + %conv2 = zext i8 %c3 to i32 + %add3 = add nsw i32 %add, %conv2 + %conv4 = sext i8 %c4 to i32 + %add5 = add nsw i32 %add3, %conv4 + %conv6 = trunc i32 %add5 to i8 + ret i8 %conv6 +} + +@global_i1 = global i8 0, align 1 + +define void @test_i1(i1 %b) { + ; 32BIT-LABEL: name: test_i1 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r4 = LWZtoc @global_i1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 0, 31, 31 + ; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store (s8) into @global_i1) + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: test_i1 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $x4 = LDtoc @global_i1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $r3 = RLWINM renamable $r3, 0, 31, 31, implicit killed $x3 + ; 64BIT-NEXT: STB killed renamable $r3, 0, killed renamable $x4 :: (store (s8) into @global_i1) + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm + entry: + %frombool = zext i1 %b to i8 + store i8 %frombool, ptr @global_i1, align 1 + ret void +} + +define void @call_test_i1() { + ; 32BIT-LABEL: name: call_test_i1 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_i1 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + call void @test_i1(i1 1) + ret void +} + +define void @test_i1zext(i1 zeroext %b) { + ; 32BIT-LABEL: name: test_i1zext + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r4 = LWZtoc @global_i1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store (s8) into @global_i1) + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: test_i1zext + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $x4 = LDtoc @global_i1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: STB8 killed renamable $x3, 0, killed renamable $x4 :: (store (s8) into @global_i1) + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm + entry: + %frombool = zext i1 %b to i8 + store i8 %frombool, ptr @global_i1, align 1 + ret void + } + +define i32 @test_ints(i32 signext %a, i32 zeroext %b, i32 zeroext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) { + ; 32BIT-LABEL: name: test_ints + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r7 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r8 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r9 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r10 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; + ; 64BIT-LABEL: name: test_ints + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $r3 = ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r6, implicit killed $x6 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r7, implicit killed $x7 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r8, implicit killed $x8 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r9, implicit killed $x9 + ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r10, implicit killed $x10, implicit-def $x3 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %add = add i32 %a, %b + %add1 = add i32 %add, %c + %add2 = add i32 %add1, %d + %add3 = add i32 %add2, %e + %add4 = add i32 %add3, %f + %add5 = add i32 %add4, %g + %add6 = add i32 %add5, %h + ret i32 %add6 +} + +define void @call_test_ints() { + ; 32BIT-LABEL: name: call_test_ints + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 1 + ; 32BIT-NEXT: $r5 = LIS 32768 + ; 32BIT-NEXT: $r6 = LIS 32768 + ; 32BIT-NEXT: $r7 = LI 1 + ; 32BIT-NEXT: $r8 = LI 1 + ; 32BIT-NEXT: $r9 = LI 1 + ; 32BIT-NEXT: $r10 = LI 1 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1, implicit-def dead $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_ints + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: renamable $x3 = LI8 1 + ; 64BIT-NEXT: renamable $x5 = RLDIC killed renamable $x3, 31, 32 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 1 + ; 64BIT-NEXT: $x6 = LIS8 32768 + ; 64BIT-NEXT: $x7 = LI8 1 + ; 64BIT-NEXT: $x8 = LI8 1 + ; 64BIT-NEXT: $x9 = LI8 1 + ; 64BIT-NEXT: $x10 = LI8 1 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + call i32 @test_ints(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1) + ret void +} + +define void @call_test_i64() { + ; 32BIT-LABEL: name: call_test_i64 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 0 + ; 32BIT-NEXT: $r4 = LI 1 + ; 32BIT-NEXT: $r5 = LI 0 + ; 32BIT-NEXT: $r6 = LI 2 + ; 32BIT-NEXT: $r7 = LI 0 + ; 32BIT-NEXT: $r8 = LI 3 + ; 32BIT-NEXT: $r9 = LI 0 + ; 32BIT-NEXT: $r10 = LI 4 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1, implicit-def dead $r3, implicit-def dead $r4 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_i64 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + call i64 @test_i64(i64 1, i64 2, i64 3, i64 4) + ret void +} + +define i64 @test_i64(i64 %a, i64 %b, i64 %c, i64 %d) { + ; 32BIT-LABEL: name: test_i64 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r4 = ADDC killed renamable $r4, killed renamable $r6, implicit-def $carry + ; 32BIT-NEXT: renamable $r3 = ADDE killed renamable $r3, killed renamable $r5, implicit-def dead $carry, implicit killed $carry + ; 32BIT-NEXT: renamable $r4 = ADDC killed renamable $r4, killed renamable $r8, implicit-def $carry + ; 32BIT-NEXT: renamable $r3 = ADDE killed renamable $r3, killed renamable $r7, implicit-def dead $carry, implicit killed $carry + ; 32BIT-NEXT: renamable $r4 = ADDC killed renamable $r4, killed renamable $r10, implicit-def $carry + ; 32BIT-NEXT: renamable $r3 = ADDE killed renamable $r3, killed renamable $r9, implicit-def dead $carry, implicit killed $carry + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3, implicit $r4 + ; + ; 64BIT-LABEL: name: test_i64 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x4 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x5 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x6 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %add = add nsw i64 %a, %b + %add1 = add nsw i64 %add, %c + %add2 = add nsw i64 %add1, %d + ret i64 %add2 +} + +define void @call_test_int_ptr() { + ; 32BIT-LABEL: name: call_test_int_ptr + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LI 0 + ; 32BIT-NEXT: STW killed renamable $r3, 0, %stack.0.b :: (store (s32) into %ir.b) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: renamable $r3 = ADDI %stack.0.b, 0 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_int_ptr + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LI8 0 + ; 64BIT-NEXT: STW8 killed renamable $x3, 0, %stack.0.b :: (store (s32) into %ir.b) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: renamable $x3 = ADDI8 %stack.0.b, 0 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %b = alloca i32, align 4 + store i32 0, ptr %b, align 4 + call void @test_int_ptr(ptr %b) + ret void +} + +define void @test_int_ptr(ptr %a) { + ; 32BIT-LABEL: name: test_int_ptr + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: STW killed renamable $r3, 0, %stack.0.a.addr :: (store (s32) into %ir.a.addr, align 8) + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: test_int_ptr + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: STD killed renamable $x3, 0, %stack.0.a.addr :: (store (s64) into %ir.a.addr) + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %a.addr = alloca ptr, align 8 + store ptr %a, ptr %a.addr, align 8 + ret void +} + +define i32 @caller(i32 %i) { + ; 32BIT-LABEL: name: caller + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: STW renamable $r3, 0, %stack.0.i.addr :: (store (s32) into %ir.i.addr) + ; 32BIT-NEXT: renamable $r3 = CNTLZW killed renamable $r3 + ; 32BIT-NEXT: renamable $r3 = NOR killed renamable $r3, renamable $r3 + ; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 27, 31, 31 + ; 32BIT-NEXT: STB renamable $r3, 0, %stack.1.b :: (store (s8) into %ir.b) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1, implicit-def $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; + ; 64BIT-LABEL: name: caller + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: STW renamable $r3, 0, %stack.0.i.addr :: (store (s32) into %ir.i.addr) + ; 64BIT-NEXT: renamable $r3 = CNTLZW renamable $r3, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 27, 5, 31 + ; 64BIT-NEXT: renamable $r3 = XORI killed renamable $r3, 1, implicit-def $x3 + ; 64BIT-NEXT: STB renamable $r3, 0, %stack.1.b :: (store (s8) into %ir.b) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1, implicit-def $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %i.addr = alloca i32, align 4 + %b = alloca i8, align 1 + store i32 %i, ptr %i.addr, align 4 + %0 = load i32, ptr %i.addr, align 4 + %cmp = icmp ne i32 %0, 0 + %frombool = zext i1 %cmp to i8 + store i8 %frombool, ptr %b, align 1 + %1 = load i8, ptr %b, align 1 + %tobool = trunc i8 %1 to i1 + %call = call i32 @call_test_bool(i1 zeroext %tobool) + ret i32 %call +} + +declare i32 @call_test_bool(i1 zeroext) + +@f1 = global float 0.000000e+00, align 4 +@d1 = global double 0.000000e+00, align 8 + +define void @call_test_floats() { + ; 32BIT-LABEL: name: call_test_floats + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $f2 = COPY renamable $f1 + ; 32BIT-NEXT: $f3 = COPY renamable $f1 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $r2, implicit-def $r1, implicit-def dead $f1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_floats + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $f2 = COPY renamable $f1 + ; 64BIT-NEXT: $f3 = COPY renamable $f1 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $x2, implicit-def $r1, implicit-def dead $f1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f1, align 4 + call float @test_floats(float %0, float %0, float %0) + ret void +} + +define float @test_floats(float %f1, float %f2, float %f3) { + ; 32BIT-LABEL: name: test_floats + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $f1, $f2, $f3 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADDS killed renamable $f1, killed renamable $f2, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADDS killed renamable $f0, killed renamable $f3, implicit $rm + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 + ; + ; 64BIT-LABEL: name: test_floats + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $f1, $f2, $f3 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADDS killed renamable $f1, killed renamable $f2, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADDS killed renamable $f0, killed renamable $f3, implicit $rm + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 +entry: + %add = fadd float %f1, %f2 + %add1 = fadd float %add, %f3 + ret float %add1 +} + +define void @call_test_fpr_max() { + ; 32BIT-LABEL: name: call_test_fpr_max + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d1) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: STFD renamable $f1, 120, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 112, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 104, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 96, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 88, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 80, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 72, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 64, $r1 :: (store (s64)) + ; 32BIT-NEXT: $f2 = COPY renamable $f1 + ; 32BIT-NEXT: $f3 = COPY renamable $f1 + ; 32BIT-NEXT: $f4 = COPY renamable $f1 + ; 32BIT-NEXT: $f5 = COPY renamable $f1 + ; 32BIT-NEXT: $f6 = COPY renamable $f1 + ; 32BIT-NEXT: $f7 = COPY renamable $f1 + ; 32BIT-NEXT: $f8 = COPY renamable $f1 + ; 32BIT-NEXT: $f9 = COPY renamable $f1 + ; 32BIT-NEXT: $f10 = COPY renamable $f1 + ; 32BIT-NEXT: $f11 = COPY renamable $f1 + ; 32BIT-NEXT: $f12 = COPY renamable $f1 + ; 32BIT-NEXT: $f13 = COPY renamable $f1 + ; 32BIT-NEXT: STFD renamable $f1, 56, $r1 :: (store (s64)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 + ; 32BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_fpr_max + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @d1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load (s64) from @d1) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: STFD renamable $f1, 144, $x1 :: (store (s64)) + ; 64BIT-NEXT: STFD renamable $f1, 136, $x1 :: (store (s64)) + ; 64BIT-NEXT: STFD renamable $f1, 128, $x1 :: (store (s64)) + ; 64BIT-NEXT: STFD renamable $f1, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: $f2 = COPY renamable $f1 + ; 64BIT-NEXT: $f3 = COPY renamable $f1 + ; 64BIT-NEXT: $f4 = COPY renamable $f1 + ; 64BIT-NEXT: $f5 = COPY renamable $f1 + ; 64BIT-NEXT: $f6 = COPY renamable $f1 + ; 64BIT-NEXT: $f7 = COPY renamable $f1 + ; 64BIT-NEXT: $f8 = COPY renamable $f1 + ; 64BIT-NEXT: $f9 = COPY renamable $f1 + ; 64BIT-NEXT: $f10 = COPY renamable $f1 + ; 64BIT-NEXT: $f11 = COPY renamable $f1 + ; 64BIT-NEXT: $f12 = COPY renamable $f1 + ; 64BIT-NEXT: $f13 = COPY renamable $f1 + ; 64BIT-NEXT: STFD renamable $f1, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1, implicit-def dead $f1 + ; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load double, ptr @d1, align 8 + call double @test_fpr_max(double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0) + ret void +} + +define double @test_fpr_max(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13) { + ; 32BIT-LABEL: name: test_fpr_max + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f4, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f5, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f6, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f7, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f8, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f9, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f10, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 + ; + ; 64BIT-LABEL: name: test_fpr_max + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f4, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f5, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f6, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f7, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f8, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f9, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f10, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 +entry: + %add = fadd double %d1, %d2 + %add1 = fadd double %add, %d3 + %add2 = fadd double %add1, %d4 + %add3 = fadd double %add2, %d5 + %add4 = fadd double %add3, %d6 + %add5 = fadd double %add4, %d7 + %add6 = fadd double %add5, %d8 + %add7 = fadd double %add6, %d9 + %add8 = fadd double %add7, %d10 + %add9 = fadd double %add8, %d11 + %add10 = fadd double %add9, %d12 + %add11 = fadd double %add10, %d13 + ret double %add11 +} + +define void @call_test_mix() { + ; 32BIT-LABEL: name: call_test_mix + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LWZtoc @d1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r4 :: (dereferenceable load (s64) from @d1) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r4 = LI 1 + ; 32BIT-NEXT: $r7 = LI 97 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r4, implicit $f2, implicit killed $r7, implicit $r2, implicit-def $r1, implicit-def dead $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_mix + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x4 = LDtoc @d1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load (s64) from @d1) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x4 = LI8 1 + ; 64BIT-NEXT: $x6 = LI8 97 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x4, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f1, align 4 + %1 = load double, ptr @d1, align 8 + call i32 @test_mix(float %0, i32 1, double %1, i8 signext 97) + ret void +} + +define i32 @test_mix(float %f, i32 signext %i, double %d, i8 signext %c) { + ; 32BIT-LABEL: name: test_mix + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $f1, $f2, $r4, $r7 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r3 = LIS 17200 + ; 32BIT-NEXT: STW killed renamable $r3, 0, %stack.1 :: (store (s32) into %stack.1, align 8) + ; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r7, 0, 24, 31 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r3 = XORIS killed renamable $r3, 32768 + ; 32BIT-NEXT: STW killed renamable $r3, 4, %stack.1 :: (store (s32) into %stack.1 + 4) + ; 32BIT-NEXT: renamable $f0 = LFS 0, killed renamable $r4 :: (load (s32) from constant-pool) + ; 32BIT-NEXT: renamable $f3 = LFD 0, %stack.1 :: (load (s64) from %stack.1) + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FRSP killed renamable $f1, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FSUB killed renamable $f3, killed renamable $f0, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FRSP killed renamable $f0, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADDS killed renamable $f0, killed renamable $f1, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm + ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; + ; 64BIT-LABEL: name: test_mix + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $f1, $f2, $x4, $x6 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $r3 = RLWINM renamable $r6, 0, 24, 31, implicit killed $x6 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r4, killed renamable $r3, implicit killed $x4 + ; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r3 + ; 64BIT-NEXT: STD killed renamable $x3, 0, %stack.1 :: (store (s64) into %stack.1) + ; 64BIT-NEXT: renamable $f0 = LFD 0, %stack.1 :: (load (s64) from %stack.1) + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FRSP killed renamable $f1, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FCFID killed renamable $f0, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FRSP killed renamable $f0, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADDS killed renamable $f0, killed renamable $f1, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm + ; 64BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %conv = fpext float %f to double + %add = fadd double %conv, %d + %conv1 = fptrunc double %add to float + %conv2 = zext i8 %c to i32 + %add3 = add nsw i32 %i, %conv2 + %conv4 = sitofp i32 %add3 to float + %add5 = fadd float %conv4, %conv1 + %conv6 = fptosi float %add5 to i32 + ret i32 %conv6 +} + +define i64 @callee_mixed_ints(i32 %a, i8 signext %b, i32 %c, i16 signext %d, i64 %e) { + ; 32BIT-LABEL: name: callee_mixed_ints + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r4 = RLWINM killed renamable $r4, 0, 24, 31 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r5 = SRAWI renamable $r3, 31, implicit-def dead $carry + ; 32BIT-NEXT: renamable $r4 = ADDC killed renamable $r3, killed renamable $r8, implicit-def $carry + ; 32BIT-NEXT: renamable $r3 = ADDE killed renamable $r5, killed renamable $r7, implicit-def dead $carry, implicit killed $carry + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3, implicit $r4 + ; + ; 64BIT-LABEL: name: callee_mixed_ints + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $r4 = RLWINM renamable $r4, 0, 24, 31, implicit killed $x4 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r3, killed renamable $r4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6 + ; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r3 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x7 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %conv = zext i8 %b to i32 + %add = add nsw i32 %a, %conv + %add1 = add nsw i32 %add, %c + %conv2 = sext i16 %d to i32 + %add3 = add nsw i32 %add1, %conv2 + %conv4 = sext i32 %add3 to i64 + %add5 = add nsw i64 %conv4, %e + ret i64 %add5 + } + +define void @call_test_vararg() { + ; 32BIT-LABEL: name: call_test_vararg + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) + ; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.1 :: (load (s32) from %stack.1, align 8) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d1) + ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.1 :: (load (s32) from %stack.1 + 4) + ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) + ; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 42 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit $f2, implicit $r6, implicit $r7, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_vararg + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) + ; 64BIT-NEXT: renamable $x3 = LDtoc @d1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x3 :: (dereferenceable load (s64) from @d1) + ; 64BIT-NEXT: renamable $x4 = LD 0, %stack.1 :: (load (s64) from %stack.1) + ; 64BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x5 = LD 0, %stack.0 :: (load (s64) from %stack.0) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 42 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $f2, implicit $x5, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, ptr @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, double %1) + ret void +} + +declare void @test_vararg(i32, ...) + +define void @call_test_vararg2() { + ; 32BIT-LABEL: name: call_test_vararg2 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) + ; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.1 :: (load (s32) from %stack.1, align 8) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d1) + ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.1 :: (load (s32) from %stack.1 + 4) + ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) + ; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 42 + ; 32BIT-NEXT: $r6 = LI 42 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit $f2, implicit $r7, implicit $r8, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_vararg2 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) + ; 64BIT-NEXT: renamable $x3 = LDtoc @d1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x3 :: (dereferenceable load (s64) from @d1) + ; 64BIT-NEXT: renamable $x4 = LD 0, %stack.1 :: (load (s64) from %stack.1) + ; 64BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x6 = LD 0, %stack.0 :: (load (s64) from %stack.0) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 42 + ; 64BIT-NEXT: $x5 = LI8 42 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, ptr @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, i32 42, double %1) + ret void +} + +define void @call_test_vararg3() { + ; 32BIT-LABEL: name: call_test_vararg3 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) + ; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.1 :: (load (s32) from %stack.1, align 8) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d1) + ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.1 :: (load (s32) from %stack.1 + 4) + ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) + ; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 42 + ; 32BIT-NEXT: $r6 = LI 0 + ; 32BIT-NEXT: $r7 = LI 42 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit killed $r7, implicit $f2, implicit $r8, implicit $r9, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_vararg3 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) + ; 64BIT-NEXT: renamable $x3 = LDtoc @d1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x3 :: (dereferenceable load (s64) from @d1) + ; 64BIT-NEXT: renamable $x4 = LD 0, %stack.1 :: (load (s64) from %stack.1) + ; 64BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x6 = LD 0, %stack.0 :: (load (s64) from %stack.0) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 42 + ; 64BIT-NEXT: $x5 = LI8 42 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f1, align 4 + %conv = fpext float %0 to double + %1 = load double, ptr @d1, align 8 + call void (i32, ...) @test_vararg(i32 42, double %conv, i64 42, double %1) + ret void +} + +define void @call_test_vararg4() { + ; 32BIT-LABEL: name: call_test_vararg4 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) + ; 32BIT-NEXT: STFS renamable $f1, 0, %stack.0 :: (store (s32) into %stack.0) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.0 :: (load (s32) from %stack.0) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 42 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_vararg4 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) + ; 64BIT-NEXT: STFS renamable $f1, 0, %stack.0 :: (store (s32) into %stack.0) + ; 64BIT-NEXT: renamable $x4 = LWZ8 0, %stack.0 :: (load (s32) from %stack.0) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 42 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f1, align 4 + call void (i32, ...) @test_vararg(i32 42, float %0) + ret void +} + +@c = common global i8 0, align 1 +@si = common global i16 0, align 2 +@i = common global i32 0, align 4 +@lli = common global i64 0, align 8 +@f = common global float 0.000000e+00, align 4 +@d = common global double 0.000000e+00, align 8 + +; Basic saving of integral type arguments to the parameter save area. +define void @call_test_stackarg_int() { + ; 32BIT-LABEL: name: call_test_stackarg_int + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @c, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LWZtoc @si, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r5 = LWZtoc @i, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r11 = LBZ 0, killed renamable $r3 :: (dereferenceable load (s8) from @c) + ; 32BIT-NEXT: renamable $r3 = LWZtoc @lli, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LHA 0, killed renamable $r4 :: (dereferenceable load (s16) from @si) + ; 32BIT-NEXT: renamable $r5 = LWZ 0, killed renamable $r5 :: (dereferenceable load (s32) from @i) + ; 32BIT-NEXT: renamable $r6 = LWZ 0, renamable $r3 :: (dereferenceable load (s32) from @lli, align 8) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, killed renamable $r3 :: (dereferenceable load (s32) from @lli + 4, basealign 8) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 80, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: STW renamable $r5, 76, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 72, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r6, 68, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r5, 64, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r4, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 2 + ; 32BIT-NEXT: $r5 = LI 3 + ; 32BIT-NEXT: $r6 = LI 4 + ; 32BIT-NEXT: $r7 = LI 5 + ; 32BIT-NEXT: $r8 = LI 6 + ; 32BIT-NEXT: $r9 = LI 7 + ; 32BIT-NEXT: $r10 = LI 8 + ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 80, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_stackarg_int + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @c, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x4 = LDtoc @si, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x5 = LDtoc @i, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x6 = LDtoc @lli, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x11 = LBZ8 0, killed renamable $x3 :: (dereferenceable load (s8) from @c) + ; 64BIT-NEXT: renamable $x12 = LHA8 0, killed renamable $x4 :: (dereferenceable load (s16) from @si) + ; 64BIT-NEXT: renamable $x0 = LWZ8 0, killed renamable $x5 :: (dereferenceable load (s32) from @i) + ; 64BIT-NEXT: renamable $x31 = LD 0, killed renamable $x6 :: (dereferenceable load (s64) from @lli) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: $x7 = LI8 5 + ; 64BIT-NEXT: $x8 = LI8 6 + ; 64BIT-NEXT: $x9 = LI8 7 + ; 64BIT-NEXT: $x10 = LI8 8 + ; 64BIT-NEXT: STD killed renamable $x31, 136, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD renamable $x0, 144, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x0, 128, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x12, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x11, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load i8, ptr @c, align 1 + %1 = load i16, ptr @si, align 2 + %2 = load i32, ptr @i, align 4 + %3 = load i64, ptr @lli, align 8 + %4 = load i32, ptr @i, align 4 + call void @test_stackarg_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i8 zeroext %0, i16 signext %1, i32 %2, i64 %3, i32 %4) + ret void +} + +declare void @test_stackarg_int(i32, i32, i32, i32, i32, i32, i32, i32, i8 zeroext, i16 signext, i32, i64, i32) + +; Basic saving of floating point type arguments to the parameter save area. +; The float and double arguments will pass in both fpr as well as parameter save area. +define void @call_test_stackarg_float() { + ; 32BIT-LABEL: name: call_test_stackarg_float + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LWZtoc @d, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r4 :: (dereferenceable load (s64) from @d) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 68, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: STFD renamable $f2, 60, $r1 :: (store (s64)) + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 2 + ; 32BIT-NEXT: $r5 = LI 3 + ; 32BIT-NEXT: $r6 = LI 4 + ; 32BIT-NEXT: $r7 = LI 5 + ; 32BIT-NEXT: $r8 = LI 6 + ; 32BIT-NEXT: $r9 = LI 7 + ; 32BIT-NEXT: $r10 = LI 8 + ; 32BIT-NEXT: STFS renamable $f1, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 68, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_stackarg_float + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x4 = LDtoc @d, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load (s64) from @d) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: STFD renamable $f2, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: $x7 = LI8 5 + ; 64BIT-NEXT: $x8 = LI8 6 + ; 64BIT-NEXT: $x9 = LI8 7 + ; 64BIT-NEXT: $x10 = LI8 8 + ; 64BIT-NEXT: STFS renamable $f1, 112, $x1 :: (store (s32)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $f1, implicit $f2, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f, align 4 + %1 = load double, ptr @d, align 8 + call void @test_stackarg_float(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, float %0, double %1) + ret void +} + +declare void @test_stackarg_float(i32, i32, i32, i32, i32, i32, i32, i32, float, double) + +define void @call_test_stackarg_float2() { + ; 32BIT-LABEL: name: call_test_stackarg_float2 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @d, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d) + ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r9 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) + ; 32BIT-NEXT: renamable $r10 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 2 + ; 32BIT-NEXT: $r5 = LI 3 + ; 32BIT-NEXT: $r6 = LI 4 + ; 32BIT-NEXT: $r7 = LI 5 + ; 32BIT-NEXT: $r8 = LI 6 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit $f1, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_stackarg_float2 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @d, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load (s64) from @d) + ; 64BIT-NEXT: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x9 = LD 0, %stack.0 :: (load (s64) from %stack.0) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: $x7 = LI8 5 + ; 64BIT-NEXT: $x8 = LI8 6 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit $f1, implicit $x9, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load double, ptr @d, align 8 + call void (i32, i32, i32, i32, i32, i32, ...) @test_stackarg_float2(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, double %0) + ret void +} + +declare void @test_stackarg_float2(i32, i32, i32, i32, i32, i32, ...) + +; A double arg will pass on the stack in PPC32 if there is only one available GPR. +define void @call_test_stackarg_float3() { + ; 32BIT-LABEL: name: call_test_stackarg_float3 + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @d, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d) + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r10 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) + ; 32BIT-NEXT: renamable $f2 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: STFS renamable $f2, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 2 + ; 32BIT-NEXT: $r5 = LI 3 + ; 32BIT-NEXT: $r6 = LI 4 + ; 32BIT-NEXT: $r7 = LI 5 + ; 32BIT-NEXT: $r8 = LI 6 + ; 32BIT-NEXT: $r9 = LI 7 + ; 32BIT-NEXT: STFD renamable $f1, 52, $r1 :: (store (s64)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_stackarg_float3 + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @d, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load (s64) from @d) + ; 64BIT-NEXT: renamable $x3 = LDtoc @f, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x10 = LD 0, %stack.0 :: (load (s64) from %stack.0) + ; 64BIT-NEXT: renamable $f2 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: $x7 = LI8 5 + ; 64BIT-NEXT: $x8 = LI8 6 + ; 64BIT-NEXT: $x9 = LI8 7 + ; 64BIT-NEXT: STFS renamable $f2, 112, $x1 :: (store (s32)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit $f1, implicit $x10, implicit $f2, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load double, ptr @d, align 8 + %1 = load float, ptr @f, align 4 + call void (i32, i32, i32, i32, i32, i32, i32, ...) @test_stackarg_float3(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, double %0, float %1) + ret void +} + +declare void @test_stackarg_float3(i32, i32, i32, i32, i32, i32, i32, ...) + +define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i64 %ll9, i16 signext %s10, i8 zeroext %c11, i32 %ui12, i32 %si13, i64 %ll14, i8 zeroext %uc15, i32 %i16) { + ; 32BIT-LABEL: name: test_ints_stack + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0) + ; 32BIT-NEXT: renamable $r12 = LWZ 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4) + ; 32BIT-NEXT: renamable $r0 = LWZ 0, %fixed-stack.1 :: (load (s32) from %fixed-stack.1, align 8) + ; 32BIT-NEXT: renamable $r31 = LWZ 4, %fixed-stack.3 :: (load (s32) from %fixed-stack.3 + 4, basealign 16) + ; 32BIT-NEXT: renamable $r30 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3, align 16) + ; 32BIT-NEXT: renamable $r29 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5, align 8) + ; 32BIT-NEXT: renamable $r28 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6) + ; 32BIT-NEXT: renamable $r27 = LWZ 0, %fixed-stack.7 :: (load (s32) from %fixed-stack.7, align 16) + ; 32BIT-NEXT: renamable $r26 = LWZ 4, %fixed-stack.9 :: (load (s32) from %fixed-stack.9 + 4, basealign 8) + ; 32BIT-NEXT: renamable $r25 = LWZ 0, %fixed-stack.9 :: (load (s32) from %fixed-stack.9, align 8) + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r5 = SRAWI renamable $r11, 31, implicit-def dead $carry + ; 32BIT-NEXT: renamable $r4 = SRAWI renamable $r12, 31, implicit-def dead $carry + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r7 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r8 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r9 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r10 + ; 32BIT-NEXT: renamable $r6 = SRAWI renamable $r3, 31, implicit-def dead $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r26, implicit-def $carry + ; 32BIT-NEXT: renamable $r6 = ADDE killed renamable $r6, killed renamable $r25, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r7 = SRAWI renamable $r27, 31, implicit-def dead $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r27, implicit-def $carry + ; 32BIT-NEXT: renamable $r6 = ADDE killed renamable $r6, killed renamable $r7, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r28, implicit-def $carry + ; 32BIT-NEXT: renamable $r6 = ADDZE killed renamable $r6, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r29, implicit-def $carry + ; 32BIT-NEXT: renamable $r6 = ADDZE killed renamable $r6, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r12, implicit-def $carry + ; 32BIT-NEXT: renamable $r4 = ADDE killed renamable $r6, killed renamable $r4, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r31, implicit-def $carry + ; 32BIT-NEXT: renamable $r4 = ADDE killed renamable $r4, killed renamable $r30, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r3 = ADDC killed renamable $r3, killed renamable $r0, implicit-def $carry + ; 32BIT-NEXT: renamable $r6 = ADDZE killed renamable $r4, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: renamable $r4 = ADDC killed renamable $r3, killed renamable $r11, implicit-def $carry + ; 32BIT-NEXT: renamable $r3 = ADDE killed renamable $r6, killed renamable $r5, implicit-def dead $carry, implicit $carry + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3, implicit $r4 + ; + ; 64BIT-LABEL: name: test_ints_stack + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $r11 = LWZ 0, %fixed-stack.1, implicit-def $x11 :: (load (s32) from %fixed-stack.1) + ; 64BIT-NEXT: renamable $x12 = LWZ8 0, %fixed-stack.4 :: (load (s32) from %fixed-stack.4) + ; 64BIT-NEXT: renamable $x0 = LWA 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0) + ; 64BIT-NEXT: renamable $x31 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2) + ; 64BIT-NEXT: renamable $x30 = LWA 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3) + ; 64BIT-NEXT: renamable $r29 = LWZ 0, %fixed-stack.5, implicit-def $x29 :: (load (s32) from %fixed-stack.5) + ; 64BIT-NEXT: renamable $x28 = LWA 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6) + ; 64BIT-NEXT: renamable $x27 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7, align 16) + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r7, implicit killed $x7 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r8, implicit killed $x8 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r9, implicit killed $x9 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r10, implicit killed $x10 + ; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r3 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x27 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x28 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x29 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x12 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x30 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x31 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x11 + ; 64BIT-NEXT: renamable $x3 = nsw ADD8 killed renamable $x3, killed renamable $x0 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %add = add nsw i32 %i1, %i2 + %add1 = add nsw i32 %add, %i3 + %add2 = add nsw i32 %add1, %i4 + %add3 = add nsw i32 %add2, %i5 + %add4 = add nsw i32 %add3, %i6 + %add5 = add nsw i32 %add4, %i7 + %add6 = add nsw i32 %add5, %i8 + %conv = sext i32 %add6 to i64 + %add7 = add nsw i64 %conv, %ll9 + %conv8 = sext i16 %s10 to i64 + %add9 = add nsw i64 %add7, %conv8 + %conv10 = zext i8 %c11 to i64 + %add11 = add nsw i64 %add9, %conv10 + %conv12 = zext i32 %ui12 to i64 + %add13 = add nsw i64 %add11, %conv12 + %conv14 = sext i32 %si13 to i64 + %add15 = add nsw i64 %add13, %conv14 + %add16 = add nsw i64 %add15, %ll14 + %conv17 = zext i8 %uc15 to i64 + %add18 = add nsw i64 %add16, %conv17 + %conv19 = sext i32 %i16 to i64 + %add20 = add nsw i64 %add18, %conv19 + ret i64 %add20 +} + +@ll1 = common global i64 0, align 8 +@si1 = common global i16 0, align 2 +@ch = common global i8 0, align 1 +@ui = common global i32 0, align 4 +@sint = common global i32 0, align 4 +@ll2 = common global i64 0, align 8 +@uc1 = common global i8 0, align 1 +@i1 = common global i32 0, align 4 + +define void @caller_ints_stack() { + ; 32BIT-LABEL: name: caller_ints_stack + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @ll1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LWZtoc @si1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r11 = LWZ 0, renamable $r3 :: (dereferenceable load (s32) from @ll1, align 8) + ; 32BIT-NEXT: renamable $r5 = LWZtoc @ch, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, killed renamable $r3 :: (dereferenceable load (s32) from @ll1 + 4, basealign 8) + ; 32BIT-NEXT: renamable $r6 = LWZtoc @ui, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LHA 0, killed renamable $r4 :: (dereferenceable load (s16) from @si1) + ; 32BIT-NEXT: renamable $r7 = LWZtoc @sint, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r5 = LBZ 0, killed renamable $r5 :: (dereferenceable load (s8) from @ch) + ; 32BIT-NEXT: renamable $r8 = LWZtoc @ll2, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r6 = LWZ 0, killed renamable $r6 :: (dereferenceable load (s32) from @ui) + ; 32BIT-NEXT: renamable $r7 = LWZ 0, killed renamable $r7 :: (dereferenceable load (s32) from @sint) + ; 32BIT-NEXT: renamable $r9 = LWZtoc @uc1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r10 = LWZ 0, renamable $r8 :: (dereferenceable load (s32) from @ll2, align 8) + ; 32BIT-NEXT: renamable $r12 = LWZtoc @i1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r8 = LWZ 4, killed renamable $r8 :: (dereferenceable load (s32) from @ll2 + 4, basealign 8) + ; 32BIT-NEXT: renamable $r9 = LBZ 0, killed renamable $r9 :: (dereferenceable load (s8) from @uc1) + ; 32BIT-NEXT: renamable $r12 = LWZ 0, killed renamable $r12 :: (dereferenceable load (s32) from @i1) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 96, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: STW killed renamable $r12, 92, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r9, 88, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r8, 84, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r10, 80, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r7, 76, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r6, 72, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r5, 68, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r4, 64, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 2 + ; 32BIT-NEXT: $r5 = LI 3 + ; 32BIT-NEXT: $r6 = LI 4 + ; 32BIT-NEXT: $r7 = LI 5 + ; 32BIT-NEXT: $r8 = LI 6 + ; 32BIT-NEXT: $r9 = LI 7 + ; 32BIT-NEXT: $r10 = LI 8 + ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def dead $r3, implicit-def dead $r4 + ; 32BIT-NEXT: ADJCALLSTACKUP 96, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: caller_ints_stack + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @si1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x4 = LDtoc @ch, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x5 = LDtoc @ui, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x11 = LHA8 0, killed renamable $x3 :: (dereferenceable load (s16) from @si1) + ; 64BIT-NEXT: renamable $x3 = LDtoc @sint, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x12 = LBZ8 0, killed renamable $x4 :: (dereferenceable load (s8) from @ch) + ; 64BIT-NEXT: renamable $x4 = LDtoc @uc1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x0 = LWZ8 0, killed renamable $x5 :: (dereferenceable load (s32) from @ui) + ; 64BIT-NEXT: renamable $x5 = LDtoc @ll1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x6 = LDtoc @ll2, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x7 = LDtoc @i1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x31 = LWZ8 0, killed renamable $x3 :: (dereferenceable load (s32) from @sint) + ; 64BIT-NEXT: renamable $x30 = LBZ8 0, killed renamable $x4 :: (dereferenceable load (s8) from @uc1) + ; 64BIT-NEXT: renamable $x29 = LD 0, killed renamable $x5 :: (dereferenceable load (s64) from @ll1) + ; 64BIT-NEXT: renamable $x28 = LD 0, killed renamable $x6 :: (dereferenceable load (s64) from @ll2) + ; 64BIT-NEXT: renamable $x27 = LWZ8 0, killed renamable $x7 :: (dereferenceable load (s32) from @i1) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: $x7 = LI8 5 + ; 64BIT-NEXT: $x8 = LI8 6 + ; 64BIT-NEXT: $x9 = LI8 7 + ; 64BIT-NEXT: $x10 = LI8 8 + ; 64BIT-NEXT: STD killed renamable $x27, 168, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x30, 160, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x28, 152, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x31, 144, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x0, 136, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x12, 128, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x11, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x29, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load i64, ptr @ll1, align 8 + %1 = load i16, ptr @si1, align 2 + %2 = load i8, ptr @ch, align 1 + %3 = load i32, ptr @ui, align 4 + %4 = load i32, ptr @sint, align 4 + %5 = load i64, ptr @ll2, align 8 + %6 = load i8, ptr @uc1, align 1 + %7 = load i32, ptr @i1, align 4 + %call = call i64 @test_ints_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i64 %0, i16 signext %1, i8 zeroext %2, i32 %3, i32 %4, i64 %5, i8 zeroext %6, i32 %7) + ret void +} + +@globali1 = global i8 0, align 1 + +define void @test_i1_stack(i32 %a, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 zeroext %b) { + ; 32BIT-LABEL: name: test_i1_stack + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LBZ 0, %fixed-stack.0 :: (load (s8) from %fixed-stack.0) + ; 32BIT-NEXT: renamable $r4 = LWZtoc @globali1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store (s8) into @globali1) + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: test_i1_stack + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $r3 = LBZ 0, %fixed-stack.0 :: (load (s8) from %fixed-stack.0) + ; 64BIT-NEXT: renamable $x4 = LDtoc @globali1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: STB killed renamable $r3, 0, killed renamable $x4 :: (store (s8) into @globali1) + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm + entry: + %frombool = zext i1 %b to i8 + store i8 %frombool, ptr @globali1, align 1 + ret void +} + +define void @call_test_i1_stack() { + ; 32BIT-LABEL: name: call_test_i1_stack + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 60, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: renamable $r11 = LI 1 + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 2 + ; 32BIT-NEXT: $r5 = LI 3 + ; 32BIT-NEXT: $r6 = LI 4 + ; 32BIT-NEXT: $r7 = LI 5 + ; 32BIT-NEXT: $r8 = LI 6 + ; 32BIT-NEXT: $r9 = LI 7 + ; 32BIT-NEXT: $r10 = LI 8 + ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: ADJCALLSTACKUP 60, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: call_test_i1_stack + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: renamable $x11 = LI8 1 + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: $x7 = LI8 5 + ; 64BIT-NEXT: $x8 = LI8 6 + ; 64BIT-NEXT: $x9 = LI8 7 + ; 64BIT-NEXT: $x10 = LI8 8 + ; 64BIT-NEXT: STD killed renamable $x11, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 + ; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm + entry: + call void @test_i1_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i1 true) + ret void +} + +define double @test_fpr_stack(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %s10, double %l11, double %d12, double %d13, float %f14, double %d15, float %f16) { + ; 32BIT-LABEL: name: test_fpr_stack + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $f0 = LFD 0, %fixed-stack.1 :: (load (s64) from %fixed-stack.1) + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 32BIT-NEXT: renamable $f2 = LFS 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 16) + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f5, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f6, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f7, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f8, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f9, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f10, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f11, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f12, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, renamable $f13, implicit $rm + ; 32BIT-NEXT: renamable $f3 = LFS 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0) + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f13, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 + ; + ; 64BIT-LABEL: name: test_fpr_stack + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $f0 = LFD 0, %fixed-stack.1 :: (load (s64) from %fixed-stack.1, align 16) + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 64BIT-NEXT: renamable $f2 = LFS 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 8) + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f5, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f6, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f7, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f8, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f9, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f10, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f11, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f12, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, renamable $f13, implicit $rm + ; 64BIT-NEXT: renamable $f3 = LFS 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 8) + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f13, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 + entry: + %add = fadd double %d1, %d2 + %add1 = fadd double %add, %d3 + %add2 = fadd double %add1, %d4 + %add3 = fadd double %add2, %d5 + %add4 = fadd double %add3, %d6 + %add5 = fadd double %add4, %d7 + %add6 = fadd double %add5, %d8 + %add7 = fadd double %add6, %d9 + %add8 = fadd double %add7, %s10 + %add9 = fadd double %add8, %l11 + %add10 = fadd double %add9, %d12 + %add11 = fadd double %add10, %d13 + %add12 = fadd double %add11, %d13 + %conv = fpext float %f14 to double + %add13 = fadd double %add12, %conv + %add14 = fadd double %add13, %d15 + %conv15 = fpext float %f16 to double + %add16 = fadd double %add14, %conv15 + ret double %add16 + } + +@f14 = common global float 0.000000e+00, align 4 +@d15 = common global double 0.000000e+00, align 8 +@f16 = common global float 0.000000e+00, align 4 + +define void @caller_fpr_stack() { + ; 32BIT-LABEL: name: caller_fpr_stack + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: renamable $r3 = LWZtoc @d15, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LWZtoc @f14, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f0 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d15) + ; 32BIT-NEXT: renamable $r5 = LWZtoc @f16, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r4 :: (load (s32) from @f14) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, killed renamable $r5 :: (load (s32) from @f16) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 144, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: renamable $r5 = LI 0 + ; 32BIT-NEXT: renamable $r6 = LIS 16352 + ; 32BIT-NEXT: STW killed renamable $r5, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r5 = LIS 13107 + ; 32BIT-NEXT: STW killed renamable $r6, 56, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16355 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 13107 + ; 32BIT-NEXT: STW killed renamable $r5, 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r5 = LIS 26214 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 13107 + ; 32BIT-NEXT: STW killed renamable $r6, 64, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16358 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 26214 + ; 32BIT-NEXT: STW killed renamable $r5, 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r5 = LIS 39321 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 26214 + ; 32BIT-NEXT: STW killed renamable $r6, 72, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16361 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 39321 + ; 32BIT-NEXT: STW killed renamable $r6, 80, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 52428 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 52429 + ; 32BIT-NEXT: STW killed renamable $r6, 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16364 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 39322 + ; 32BIT-NEXT: STW renamable $r5, 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 52428 + ; 32BIT-NEXT: STW killed renamable $r6, 88, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16313 + ; 32BIT-NEXT: STW killed renamable $r5, 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r5 = LIS 49807 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 39321 + ; 32BIT-NEXT: STW killed renamable $r6, 96, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16316 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 23593 + ; 32BIT-NEXT: STW killed renamable $r5, 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r5 = LIS 60293 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 10485 + ; 32BIT-NEXT: STW killed renamable $r6, 104, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16318 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 7864 + ; 32BIT-NEXT: STW killed renamable $r5, 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r5 = LIS 2621 + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 47185 + ; 32BIT-NEXT: STW killed renamable $r6, 112, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16320 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 28836 + ; 32BIT-NEXT: STW killed renamable $r5, 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 41943 + ; 32BIT-NEXT: STW killed renamable $r6, 120, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.2, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.3, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.4, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f6 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.5, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f7 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.6, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f8 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.7, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f9 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.8, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.9, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f11 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.10, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f12 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.11, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f13 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $f5 = LFS 0, killed renamable $r6 :: (load (s32) from constant-pool) + ; 32BIT-NEXT: STW killed renamable $r4, 140, $r1 :: (store (s32)) + ; 32BIT-NEXT: STFD killed renamable $f0, 132, $r1 :: (store (s64)) + ; 32BIT-NEXT: $f10 = COPY renamable $f1 + ; 32BIT-NEXT: STW killed renamable $r3, 128, $r1 :: (store (s32)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 + ; 32BIT-NEXT: ADJCALLSTACKUP 144, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: caller_fpr_stack + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: renamable $x3 = LDtoc @f14, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x4 = LDtoc @d15, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x5 = LDtoc @f16, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $x3 :: (load (s32) from @f14) + ; 64BIT-NEXT: renamable $x4 = LD 0, killed renamable $x4 :: (load (s64) from @d15) + ; 64BIT-NEXT: renamable $r5 = LWZ 0, killed renamable $x5 :: (load (s32) from @f16) + ; 64BIT-NEXT: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: renamable $x6 = LDtocCPT %const.0, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: STW killed renamable $r5, 168, $x1 :: (store (s32)) + ; 64BIT-NEXT: renamable $x5 = LDtocCPT %const.1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x7 = LDtocCPT %const.2, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x6 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x6 = LDtocCPT %const.3, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f3 = LFD 0, killed renamable $x5 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x5 = LDtocCPT %const.4, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f4 = LFD 0, killed renamable $x7 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x7 = LDtocCPT %const.5, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f6 = LFD 0, killed renamable $x6 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x6 = LDtocCPT %const.6, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f7 = LFD 0, killed renamable $x5 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: STD killed renamable $x4, 160, $x1 :: (store (s64)) + ; 64BIT-NEXT: renamable $x4 = LDtocCPT %const.7, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f8 = LFD 0, killed renamable $x7 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x5 = LIS8 16320 + ; 64BIT-NEXT: renamable $x7 = LDtocCPT %const.8, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f9 = LFD 0, killed renamable $x6 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x6 = LIS8 16318 + ; 64BIT-NEXT: renamable $x8 = LDtocCPT %const.9, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x4 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x4 = LIS8 16316 + ; 64BIT-NEXT: renamable $f11 = LFD 0, killed renamable $x7 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x7 = LIS8 16313 + ; 64BIT-NEXT: renamable $f12 = LFD 0, killed renamable $x8 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x8 = LDtocCPT %const.10, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x5 = ORI8 killed renamable $x5, 41943 + ; 64BIT-NEXT: renamable $x6 = ORI8 killed renamable $x6, 47185 + ; 64BIT-NEXT: renamable $x4 = ORI8 killed renamable $x4, 10485 + ; 64BIT-NEXT: renamable $x7 = ORI8 killed renamable $x7, 39321 + ; 64BIT-NEXT: renamable $f13 = LFD 0, killed renamable $x8 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x8 = LDtocCPT %const.11, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x5 = RLDIC killed renamable $x5, 32, 2 + ; 64BIT-NEXT: renamable $x6 = RLDIC killed renamable $x6, 32, 2 + ; 64BIT-NEXT: renamable $x4 = RLDIC killed renamable $x4, 32, 2 + ; 64BIT-NEXT: renamable $x7 = RLDIC killed renamable $x7, 32, 2 + ; 64BIT-NEXT: renamable $x5 = ORIS8 killed renamable $x5, 2621 + ; 64BIT-NEXT: renamable $x6 = ORIS8 killed renamable $x6, 60293 + ; 64BIT-NEXT: renamable $x4 = ORIS8 killed renamable $x4, 49807 + ; 64BIT-NEXT: renamable $x7 = ORIS8 killed renamable $x7, 39321 + ; 64BIT-NEXT: renamable $x5 = ORI8 killed renamable $x5, 28836 + ; 64BIT-NEXT: renamable $x6 = ORI8 killed renamable $x6, 7864 + ; 64BIT-NEXT: renamable $x4 = ORI8 killed renamable $x4, 23593 + ; 64BIT-NEXT: renamable $f5 = LFS 0, killed renamable $x8 :: (load (s32) from constant-pool) + ; 64BIT-NEXT: renamable $x8 = LIS8 4091 + ; 64BIT-NEXT: renamable $x8 = ORI8 killed renamable $x8, 13107 + ; 64BIT-NEXT: renamable $x7 = ORI8 killed renamable $x7, 39322 + ; 64BIT-NEXT: renamable $x8 = RLDIC killed renamable $x8, 34, 2 + ; 64BIT-NEXT: renamable $x8 = ORIS8 killed renamable $x8, 52428 + ; 64BIT-NEXT: renamable $x8 = ORI8 killed renamable $x8, 52429 + ; 64BIT-NEXT: $f10 = COPY renamable $f1 + ; 64BIT-NEXT: STW killed renamable $r3, 152, $x1 :: (store (s32)) + ; 64BIT-NEXT: STD killed renamable $x5, 144, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x6, 136, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x4, 128, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x7, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x8, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $f1 + ; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm +entry: + %0 = load float, ptr @f14, align 4 + %1 = load double, ptr @d15, align 8 + %2 = load float, ptr @f16, align 4 + %call = call double @test_fpr_stack(double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01, double 6.000000e-01, double 0x3FE6666666666666, double 8.000000e-01, double 9.000000e-01, double 1.000000e-01, double 1.100000e-01, double 1.200000e-01, double 1.300000e-01, float %0, double %1, float %2) + ret void +} + +define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) { + ; 32BIT-LABEL: name: mix_callee + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $f1, $f2, $f3, $f4 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r3 = LWZ 0, %fixed-stack.3 :: (load (s32) from %fixed-stack.3) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, %fixed-stack.5 :: (load (s32) from %fixed-stack.5) + ; 32BIT-NEXT: renamable $r5 = LWZ 0, %fixed-stack.6 :: (load (s32) from %fixed-stack.6, align 8) + ; 32BIT-NEXT: renamable $r6 = LWZ 0, %fixed-stack.2 :: (load (s32) from %fixed-stack.2, align 8) + ; 32BIT-NEXT: renamable $r7 = LIS 17200 + ; 32BIT-NEXT: STW killed renamable $r7, 0, %stack.1 :: (store (s32) into %stack.1, align 8) + ; 32BIT-NEXT: renamable $r7 = LWZ 0, %fixed-stack.1 :: (load (s32) from %fixed-stack.1) + ; 32BIT-NEXT: renamable $r4 = nsw ADD4 killed renamable $r5, killed renamable $r4 + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r4, killed renamable $r3 + ; 32BIT-NEXT: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 16) + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r7 + ; 32BIT-NEXT: renamable $f0 = LFS 0, killed renamable $r5 :: (load (s32) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = XORIS killed renamable $r3, 32768 + ; 32BIT-NEXT: STW killed renamable $r3, 4, %stack.1 :: (store (s32) into %stack.1 + 4) + ; 32BIT-NEXT: renamable $f5 = LFD 0, %stack.1 :: (load (s64) from %stack.1) + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm + ; 32BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FSUB killed renamable $f5, killed renamable $f0, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm + ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; + ; 64BIT-LABEL: name: mix_callee + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $x7, $x8, $x9, $x10 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $x3 = LWZ8 0, %fixed-stack.1 :: (load (s32) from %fixed-stack.1) + ; 64BIT-NEXT: renamable $r4 = nsw ADD4 renamable $r7, renamable $r8, implicit killed $x8, implicit killed $x7, implicit-def $x4 + ; 64BIT-NEXT: renamable $x5 = LWZ8 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0) + ; 64BIT-NEXT: renamable $x4 = ADD8 killed renamable $x4, killed renamable $x9 + ; 64BIT-NEXT: renamable $x4 = ADD8 killed renamable $x4, killed renamable $x10 + ; 64BIT-NEXT: renamable $x3 = ADD8 killed renamable $x4, killed renamable $x3 + ; 64BIT-NEXT: renamable $x3 = ADD8 killed renamable $x3, killed renamable $x5 + ; 64BIT-NEXT: renamable $x3 = EXTSW killed renamable $x3 + ; 64BIT-NEXT: STD killed renamable $x3, 0, %stack.1 :: (store (s64) into %stack.1) + ; 64BIT-NEXT: renamable $f0 = LFD 0, %stack.1 :: (load (s64) from %stack.1) + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FCFID killed renamable $f0, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm + ; 64BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 + entry: + %add = fadd double %d1, %d2 + %add1 = fadd double %add, %d3 + %add2 = fadd double %add1, %d4 + %conv = zext i8 %c1 to i32 + %conv3 = sext i16 %s1 to i32 + %add4 = add nsw i32 %conv, %conv3 + %conv5 = sext i32 %add4 to i64 + %add6 = add nsw i64 %conv5, %ll1 + %conv7 = sext i32 %i1 to i64 + %add8 = add nsw i64 %add6, %conv7 + %conv9 = sext i32 %i2 to i64 + %add10 = add nsw i64 %add8, %conv9 + %conv11 = sext i32 %i3 to i64 + %add12 = add nsw i64 %add10, %conv11 + %conv13 = trunc i64 %add12 to i32 + %conv14 = sitofp i32 %conv13 to double + %add15 = fadd double %conv14, %add2 + %conv16 = fptosi double %add15 to i32 + ret i32 %conv16 + } + +define void @caller_mix() { + ; 32BIT-LABEL: name: caller_mix + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 84, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: renamable $r3 = LI 60 + ; 32BIT-NEXT: STW killed renamable $r3, 80, $r1 :: (store (s32)) + ; 32BIT-NEXT: renamable $r3 = LI 50 + ; 32BIT-NEXT: STW killed renamable $r3, 76, $r1 :: (store (s32)) + ; 32BIT-NEXT: renamable $r3 = LI 40 + ; 32BIT-NEXT: STW killed renamable $r3, 72, $r1 :: (store (s32)) + ; 32BIT-NEXT: renamable $r3 = LI 0 + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STW killed renamable $r3, 64, $r1 :: (store (s32)) + ; 32BIT-NEXT: renamable $r3 = LI 2 + ; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.2, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.3, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LI 1 + ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: renamable $r3 = LIS 457 + ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 50048 + ; 32BIT-NEXT: STW killed renamable $r3, 68, $r1 :: (store (s32)) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $r2, implicit-def $r1, implicit-def dead $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 84, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: caller_mix + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: renamable $x3 = LDtocCPT %const.0, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x4 = LDtocCPT %const.1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x5 = LDtocCPT %const.2, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x3 = LDtocCPT %const.3, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x4 = LI8 60 + ; 64BIT-NEXT: renamable $f3 = LFD 0, killed renamable $x5 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x5 = LI8 50 + ; 64BIT-NEXT: renamable $f4 = LFD 0, killed renamable $x3 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x3 = LIS8 457 + ; 64BIT-NEXT: renamable $x9 = ORI8 killed renamable $x3, 50048 + ; 64BIT-NEXT: $x7 = LI8 1 + ; 64BIT-NEXT: $x8 = LI8 2 + ; 64BIT-NEXT: $x10 = LI8 40 + ; 64BIT-NEXT: STD killed renamable $x4, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x5, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit killed $x7, implicit killed $x8, implicit $x9, implicit killed $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm + entry: +%call = call i32 @mix_callee(double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, i8 zeroext 1, i16 signext 2, i64 30000000, i32 40, i32 50, i32 60) + ret void + } + + define i32 @mix_floats(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14) { + ; 32BIT-LABEL: name: mix_floats + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: renamable $r11 = LIS 17200 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: STW killed renamable $r11, 0, %stack.1 :: (store (s32) into %stack.1, align 8) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r7 + ; 32BIT-NEXT: renamable $f0 = LFS 0, killed renamable $r4 :: (load (s32) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r8 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r9 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r10 + ; 32BIT-NEXT: renamable $r3 = XORIS killed renamable $r3, 32768 + ; 32BIT-NEXT: STW killed renamable $r3, 4, %stack.1 :: (store (s32) into %stack.1 + 4) + ; 32BIT-NEXT: renamable $f31 = LFD 0, %stack.1 :: (load (s64) from %stack.1) + ; 32BIT-NEXT: renamable $f30 = LFD 0, %fixed-stack.0 :: (load (s64) from %fixed-stack.0, align 16) + ; 32BIT-NEXT: renamable $f0 = nofpexcept FSUB killed renamable $f31, killed renamable $f0, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f2, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f4, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f5, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f6, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f7, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f8, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f9, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f10, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f30, implicit $rm + ; 32BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm + ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; + ; 64BIT-LABEL: name: mix_floats + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64) from %fixed-stack.0) + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r7, implicit killed $x7 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r8, implicit killed $x8 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r9, implicit killed $x9 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r10, implicit killed $x10 + ; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r3 + ; 64BIT-NEXT: STD killed renamable $x3, 0, %stack.1 :: (store (s64) into %stack.1) + ; 64BIT-NEXT: renamable $f31 = LFD 0, %stack.1 :: (load (s64) from %stack.1) + ; 64BIT-NEXT: renamable $f31 = nofpexcept FCFID killed renamable $f31, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f31, killed renamable $f1, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f5, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f6, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f7, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f8, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f9, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f10, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f11, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f12, implicit $rm + ; 64BIT-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f13, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm + ; 64BIT-NEXT: renamable $f0 = nofpexcept FCTIWZ killed renamable $f0, implicit $rm + ; 64BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) + ; 64BIT-NEXT: renamable $x3 = LWZ8 4, %stack.0 :: (load (s32) from %stack.0 + 4, basealign 8) + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 + entry: + %add = add nsw i32 %i1, %i2 + %add1 = add nsw i32 %add, %i3 + %add2 = add nsw i32 %add1, %i4 + %add3 = add nsw i32 %add2, %i5 + %add4 = add nsw i32 %add3, %i6 + %add5 = add nsw i32 %add4, %i7 + %add6 = add nsw i32 %add5, %i8 + %conv = sitofp i32 %add6 to double + %add7 = fadd double %conv, %d1 + %add8 = fadd double %add7, %d2 + %add9 = fadd double %add8, %d3 + %add10 = fadd double %add9, %d4 + %add11 = fadd double %add10, %d5 + %add12 = fadd double %add11, %d6 + %add13 = fadd double %add12, %d7 + %add14 = fadd double %add13, %d8 + %add15 = fadd double %add14, %d9 + %add16 = fadd double %add15, %d10 + %add17 = fadd double %add16, %d11 + %add18 = fadd double %add17, %d12 + %add19 = fadd double %add18, %d13 + %add20 = fadd double %add19, %d14 + %conv21 = fptosi double %add20 to i32 + ret i32 %conv21 + } + + define void @mix_floats_caller() { + ; 32BIT-LABEL: name: mix_floats_caller + ; 32BIT: bb.0.entry: + ; 32BIT-NEXT: ADJCALLSTACKDOWN 168, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: renamable $r3 = LI 0 + ; 32BIT-NEXT: renamable $r4 = LIS 16352 + ; 32BIT-NEXT: renamable $r5 = LIS 16368 + ; 32BIT-NEXT: renamable $r6 = LIS 39321 + ; 32BIT-NEXT: renamable $r7 = LIS 16313 + ; 32BIT-NEXT: renamable $r8 = LIS 16329 + ; 32BIT-NEXT: renamable $r9 = LIS 13107 + ; 32BIT-NEXT: renamable $r10 = LIS 16339 + ; 32BIT-NEXT: STW renamable $r3, 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r11 = LIS 16345 + ; 32BIT-NEXT: STW killed renamable $r4, 88, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r4 = LIS 16355 + ; 32BIT-NEXT: STW killed renamable $r3, 132, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r3 = LIS 26214 + ; 32BIT-NEXT: STW killed renamable $r5, 128, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r6, 39322 + ; 32BIT-NEXT: STW renamable $r5, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r7, 39321 + ; 32BIT-NEXT: STW killed renamable $r6, 56, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = LIS 16358 + ; 32BIT-NEXT: STW renamable $r5, 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r7 = ORI killed renamable $r8, 39321 + ; 32BIT-NEXT: STW killed renamable $r7, 64, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r7 = ORI killed renamable $r9, 13107 + ; 32BIT-NEXT: STW renamable $r7, 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r8 = ORI killed renamable $r10, 13107 + ; 32BIT-NEXT: STW killed renamable $r8, 72, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r8 = LIS 16361 + ; 32BIT-NEXT: STW renamable $r5, 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r9 = ORI killed renamable $r11, 39321 + ; 32BIT-NEXT: STW killed renamable $r9, 80, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r9 = LIS 52428 + ; 32BIT-NEXT: STW renamable $r7, 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 13107 + ; 32BIT-NEXT: STW killed renamable $r4, 96, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 26214 + ; 32BIT-NEXT: STW renamable $r3, 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r6, 26214 + ; 32BIT-NEXT: STW killed renamable $r4, 104, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r4 = LIS 16364 + ; 32BIT-NEXT: STW renamable $r5, 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r8, 39321 + ; 32BIT-NEXT: STW killed renamable $r6, 112, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r9, 52429 + ; 32BIT-NEXT: STW renamable $r6, 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 52428 + ; 32BIT-NEXT: STW killed renamable $r4, 120, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r4 = LIS 16369 + ; 32BIT-NEXT: STW killed renamable $r5, 140, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 39321 + ; 32BIT-NEXT: STW killed renamable $r4, 136, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r4 = LIS 16371 + ; 32BIT-NEXT: STW killed renamable $r7, 148, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 13107 + ; 32BIT-NEXT: STW killed renamable $r4, 144, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r4 = LIS 16372 + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STW killed renamable $r6, 156, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 52428 + ; 32BIT-NEXT: STW killed renamable $r4, 152, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: STW killed renamable $r3, 164, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.2, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.3, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.4, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.5, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.6, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f6 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.7, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f7 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.8, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f8 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.9, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f9 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.10, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f11 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.11, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f12 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.12, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f13 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LIS 16374 + ; 32BIT-NEXT: renamable $f5 = LFS 0, killed renamable $r3 :: (load (s32) from constant-pool) + ; 32BIT-NEXT: renamable $r11 = ORI killed renamable $r4, 26214 + ; 32BIT-NEXT: renamable $f10 = LFS 0, killed renamable $r5 :: (load (s32) from constant-pool) + ; 32BIT-NEXT: $r3 = LI 1 + ; 32BIT-NEXT: $r4 = LI 2 + ; 32BIT-NEXT: $r5 = LI 3 + ; 32BIT-NEXT: $r6 = LI 4 + ; 32BIT-NEXT: $r7 = LI 5 + ; 32BIT-NEXT: $r8 = LI 6 + ; 32BIT-NEXT: $r9 = LI 7 + ; 32BIT-NEXT: $r10 = LI 8 + ; 32BIT-NEXT: STW killed renamable $r11, 160, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 168, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm + ; + ; 64BIT-LABEL: name: mix_floats_caller + ; 64BIT: bb.0.entry: + ; 64BIT-NEXT: ADJCALLSTACKDOWN 224, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: renamable $x3 = LI8 1023 + ; 64BIT-NEXT: renamable $x4 = LI8 511 + ; 64BIT-NEXT: renamable $x5 = LIS8 16374 + ; 64BIT-NEXT: renamable $x6 = LIS8 16371 + ; 64BIT-NEXT: renamable $x7 = LIS8 16358 + ; 64BIT-NEXT: renamable $x8 = LIS8 16355 + ; 64BIT-NEXT: renamable $x9 = LIS8 16339 + ; 64BIT-NEXT: renamable $x10 = LIS8 4093 + ; 64BIT-NEXT: renamable $x11 = LDtocCPT %const.0, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x29 = LIS8 16369 + ; 64BIT-NEXT: renamable $x28 = LIS8 4091 + ; 64BIT-NEXT: renamable $x12 = LDtocCPT %const.1, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x27 = LIS8 16361 + ; 64BIT-NEXT: renamable $x31 = LDtocCPT %const.2, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x11 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x26 = LIS8 16345 + ; 64BIT-NEXT: renamable $x11 = LDtocCPT %const.3, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x12 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x25 = LIS8 16329 + ; 64BIT-NEXT: renamable $f3 = LFD 0, killed renamable $x31 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x24 = LIS8 16313 + ; 64BIT-NEXT: renamable $x23 = LDtocCPT %const.4, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x22 = LDtocCPT %const.5, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x21 = LDtocCPT %const.6, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x20 = LDtocCPT %const.7, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x19 = LDtocCPT %const.8, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x18 = LDtocCPT %const.9, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x17 = LDtocCPT %const.10, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $f4 = LFD 0, killed renamable $x11 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x16 = LDtocCPT %const.11, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x11 = ORI8 killed renamable $x5, 26214 + ; 64BIT-NEXT: renamable $x12 = ORI8 killed renamable $x6, 13107 + ; 64BIT-NEXT: renamable $x0 = ORI8 killed renamable $x7, 26214 + ; 64BIT-NEXT: renamable $x31 = ORI8 killed renamable $x8, 13107 + ; 64BIT-NEXT: renamable $x30 = ORI8 killed renamable $x9, 13107 + ; 64BIT-NEXT: renamable $x5 = ORI8 killed renamable $x10, 13107 + ; 64BIT-NEXT: renamable $x6 = ORI8 killed renamable $x29, 39321 + ; 64BIT-NEXT: renamable $x7 = ORI8 killed renamable $x28, 13107 + ; 64BIT-NEXT: renamable $x8 = ORI8 killed renamable $x27, 39321 + ; 64BIT-NEXT: renamable $x9 = ORI8 killed renamable $x26, 39321 + ; 64BIT-NEXT: renamable $x10 = ORI8 killed renamable $x25, 39321 + ; 64BIT-NEXT: renamable $x27 = ORI8 killed renamable $x24, 39321 + ; 64BIT-NEXT: renamable $f6 = LFD 0, killed renamable $x23 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x26 = LDtocCPT %const.12, $x2 :: (load (s64) from got) + ; 64BIT-NEXT: renamable $x29 = RLDIC killed renamable $x3, 52, 2 + ; 64BIT-NEXT: renamable $x28 = RLDIC killed renamable $x4, 53, 2 + ; 64BIT-NEXT: renamable $x11 = RLDIMI killed renamable $x11, renamable $x11, 32, 0 + ; 64BIT-NEXT: renamable $x12 = RLDIMI killed renamable $x12, renamable $x12, 32, 0 + ; 64BIT-NEXT: renamable $x0 = RLDIMI killed renamable $x0, renamable $x0, 32, 0 + ; 64BIT-NEXT: renamable $x31 = RLDIMI killed renamable $x31, renamable $x31, 32, 0 + ; 64BIT-NEXT: renamable $x30 = RLDIMI killed renamable $x30, renamable $x30, 32, 0 + ; 64BIT-NEXT: renamable $x3 = RLDIC killed renamable $x5, 34, 2 + ; 64BIT-NEXT: renamable $x4 = RLDIC killed renamable $x6, 32, 2 + ; 64BIT-NEXT: renamable $x5 = RLDIC killed renamable $x7, 34, 2 + ; 64BIT-NEXT: renamable $x6 = RLDIC killed renamable $x8, 32, 2 + ; 64BIT-NEXT: renamable $x7 = RLDIC killed renamable $x9, 32, 2 + ; 64BIT-NEXT: renamable $x8 = RLDIC killed renamable $x10, 32, 2 + ; 64BIT-NEXT: renamable $x9 = RLDIC killed renamable $x27, 32, 2 + ; 64BIT-NEXT: renamable $x11 = RLWIMI8 killed renamable $x11, renamable $x11, 16, 0, 15 + ; 64BIT-NEXT: renamable $x12 = RLWIMI8 killed renamable $x12, renamable $x12, 16, 0, 15 + ; 64BIT-NEXT: renamable $x0 = RLWIMI8 killed renamable $x0, renamable $x0, 16, 0, 15 + ; 64BIT-NEXT: renamable $x31 = RLWIMI8 killed renamable $x31, renamable $x31, 16, 0, 15 + ; 64BIT-NEXT: renamable $x30 = RLWIMI8 killed renamable $x30, renamable $x30, 16, 0, 15 + ; 64BIT-NEXT: renamable $x3 = ORIS8 killed renamable $x3, 52428 + ; 64BIT-NEXT: renamable $x4 = ORIS8 killed renamable $x4, 39321 + ; 64BIT-NEXT: renamable $x5 = ORIS8 killed renamable $x5, 52428 + ; 64BIT-NEXT: renamable $x6 = ORIS8 killed renamable $x6, 39321 + ; 64BIT-NEXT: renamable $x7 = ORIS8 killed renamable $x7, 39321 + ; 64BIT-NEXT: renamable $x8 = ORIS8 killed renamable $x8, 39321 + ; 64BIT-NEXT: renamable $x9 = ORIS8 killed renamable $x9, 39321 + ; 64BIT-NEXT: renamable $f7 = LFD 0, killed renamable $x22 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x27 = ORI8 killed renamable $x3, 52429 + ; 64BIT-NEXT: renamable $f8 = LFD 0, killed renamable $x21 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x25 = ORI8 killed renamable $x4, 39322 + ; 64BIT-NEXT: renamable $f9 = LFD 0, killed renamable $x20 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x24 = ORI8 killed renamable $x5, 52429 + ; 64BIT-NEXT: renamable $f11 = LFD 0, killed renamable $x19 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x23 = ORI8 killed renamable $x6, 39322 + ; 64BIT-NEXT: renamable $f12 = LFD 0, killed renamable $x18 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x22 = ORI8 killed renamable $x7, 39322 + ; 64BIT-NEXT: renamable $f13 = LFD 0, killed renamable $x17 :: (load (s64) from constant-pool) + ; 64BIT-NEXT: renamable $x21 = ORI8 killed renamable $x8, 39322 + ; 64BIT-NEXT: renamable $f5 = LFS 0, killed renamable $x16 :: (load (s32) from constant-pool) + ; 64BIT-NEXT: renamable $x20 = ORI8 killed renamable $x9, 39322 + ; 64BIT-NEXT: renamable $f10 = LFS 0, killed renamable $x26 :: (load (s32) from constant-pool) + ; 64BIT-NEXT: $x3 = LI8 1 + ; 64BIT-NEXT: $x4 = LI8 2 + ; 64BIT-NEXT: $x5 = LI8 3 + ; 64BIT-NEXT: $x6 = LI8 4 + ; 64BIT-NEXT: $x7 = LI8 5 + ; 64BIT-NEXT: $x8 = LI8 6 + ; 64BIT-NEXT: $x9 = LI8 7 + ; 64BIT-NEXT: $x10 = LI8 8 + ; 64BIT-NEXT: STD killed renamable $x29, 184, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x28, 144, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x11, 216, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x12, 200, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x0, 160, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x31, 152, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x30, 128, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x27, 208, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x25, 192, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x24, 176, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x23, 168, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x22, 136, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x21, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x20, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm + entry: + %call = call i32 @mix_floats(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01, double 6.000000e-01, double 0x3FE6666666666666, double 8.000000e-01, double 9.000000e-01, double 1.000000e+00, double 1.100000e+00, double 1.200000e+00, double 1.300000e+00, double 1.400000e+00) + ret void + } + diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 02fe9943f39c41..78d60f06c06786 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -1,42 +1,57 @@ -; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ -; RUN: FileCheck --check-prefixes=CHECK,32BIT %s - +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ ; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \ ; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ -; RUN: FileCheck --check-prefixes=CHECK,64BIT %s - ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ ; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ ; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s define void @call_test_chars() { +; ASM32PWR4-LABEL: call_test_chars: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: li 3, 97 +; ASM32PWR4-NEXT: li 4, 97 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: li 5, 97 +; ASM32PWR4-NEXT: li 6, 97 +; ASM32PWR4-NEXT: bl .test_chars +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_chars: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: li 3, 97 +; ASM64PWR4-NEXT: li 4, 97 +; ASM64PWR4-NEXT: std 0, 128(1) +; ASM64PWR4-NEXT: li 5, 97 +; ASM64PWR4-NEXT: li 6, 97 +; ASM64PWR4-NEXT: bl .test_chars +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: call i8 @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97) ret void } -; CHECK-LABEL: name: call_test_chars - -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: $r4 = LI 97 -; 32BIT: $r5 = LI 97 -; 32BIT: $r6 = LI 97 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: $x4 = LI8 97 -; 64BIT: $x5 = LI8 97 -; 64BIT: $x6 = LI8 97 -; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - define signext i8 @test_chars(i8 signext %c1, i8 signext %c2, i8 signext %c3, i8 signext %c4) { +; CHECKASM-LABEL: test_chars: +; CHECKASM: # %bb.0: # %entry +; CHECKASM-NEXT: add 3, 3, 4 +; CHECKASM-NEXT: add 3, 3, 5 +; CHECKASM-NEXT: add 3, 3, 6 +; CHECKASM-NEXT: extsb 3, 3 +; CHECKASM-NEXT: blr entry: %conv = sext i8 %c1 to i32 %conv1 = sext i8 %c2 to i32 @@ -49,51 +64,51 @@ entry: ret i8 %conv6 } -; CHECK-LABEL: name: test_chars - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } -; 32BIT: body: -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } -; 64BIT: body: -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 - define void @call_test_chars_mix() { +; ASM32PWR4-LABEL: call_test_chars_mix: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: li 3, 97 +; ASM32PWR4-NEXT: li 4, 225 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: li 5, 97 +; ASM32PWR4-NEXT: li 6, -31 +; ASM32PWR4-NEXT: bl .test_chars_mix +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_chars_mix: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: li 3, 97 +; ASM64PWR4-NEXT: li 4, 225 +; ASM64PWR4-NEXT: std 0, 128(1) +; ASM64PWR4-NEXT: li 5, 97 +; ASM64PWR4-NEXT: li 6, -31 +; ASM64PWR4-NEXT: bl .test_chars_mix +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: call i8 @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31) ret void } -; CHECK-LABEL: name: call_test_chars_mix - -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: $r4 = LI 225 -; 32BIT: $r5 = LI 97 -; 32BIT: $r6 = LI -31 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: $x4 = LI8 225 -; 64BIT: $x5 = LI8 97 -; 64BIT: $x6 = LI8 -31 -; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - define signext i8 @test_chars_mix(i8 signext %c1, i8 zeroext %c2, i8 zeroext %c3, i8 signext %c4) { +; CHECKASM-LABEL: test_chars_mix: +; CHECKASM: # %bb.0: # %entry +; CHECKASM-NEXT: add 3, 3, 4 +; CHECKASM-NEXT: add 3, 3, 5 +; CHECKASM-NEXT: add 3, 3, 6 +; CHECKASM-NEXT: extsb 3, 3 +; CHECKASM-NEXT: blr entry: %conv = sext i8 %c1 to i32 %conv1 = zext i8 %c2 to i32 @@ -106,92 +121,88 @@ entry: ret i8 %conv6 } -; CHECK-LABEL: name: test_chars_mix - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } -; 32BIT: body: -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } -; 64BIT: body: -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 - @global_i1 = global i8 0, align 1 define void @test_i1(i1 %b) { +; ASM32PWR4-LABEL: test_i1: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: lwz 4, L..C0(2) # @global_i1 +; ASM32PWR4-NEXT: clrlwi 3, 3, 31 +; ASM32PWR4-NEXT: stb 3, 0(4) +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_i1: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: ld 4, L..C0(2) # @global_i1 +; ASM64PWR4-NEXT: clrlwi 3, 3, 31 +; ASM64PWR4-NEXT: stb 3, 0(4) +; ASM64PWR4-NEXT: blr entry: %frombool = zext i1 %b to i8 store i8 %frombool, ptr @global_i1, align 1 ret void } -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3 -; 32BIT: renamable $r3 = RLWINM killed renamable $r3, 0, 31, 31 -; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store (s8) into @global_i1) - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3 -; 64BIT: renamable $r[[REG1:[0-9]+]] = RLWINM renamable $r[[REG1]], 0, 31, 31, implicit killed $x3 -; 64BIT-NEXT: STB killed renamable $r[[REG1]], 0, killed renamable $x4 :: (store (s8) into @global_i1) - define void @call_test_i1() { +; ASM32PWR4-LABEL: call_test_i1: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: bl .test_i1 +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_i1: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: std 0, 128(1) +; ASM64PWR4-NEXT: bl .test_i1 +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: call void @test_i1(i1 1) ret void } -; CHECK-LABEL: name: call_test_i1 - -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 define void @test_i1zext(i1 zeroext %b) { +; ASM32PWR4-LABEL: test_i1zext: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: lwz 4, L..C0(2) # @global_i1 +; ASM32PWR4-NEXT: stb 3, 0(4) +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_i1zext: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: ld 4, L..C0(2) # @global_i1 +; ASM64PWR4-NEXT: stb 3, 0(4) +; ASM64PWR4-NEXT: blr entry: %frombool = zext i1 %b to i8 store i8 %frombool, ptr @global_i1, align 1 ret void } -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3 -; CHECK-NOT: RLWINM -; 32BIT: STB killed renamable $r3, 0, killed renamable $r4 :: (store (s8) into @global_i1) - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3 -; CHECK-NOT: RLWINM -; 64BIT: STB8 killed renamable $x3, 0, killed renamable $x4 :: (store (s8) into @global_i1) - define i32 @test_ints(i32 signext %a, i32 zeroext %b, i32 zeroext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) { +; CHECKASM-LABEL: test_ints: +; CHECKASM: # %bb.0: # %entry +; CHECKASM-NEXT: add 3, 3, 4 +; CHECKASM-NEXT: add 3, 3, 5 +; CHECKASM-NEXT: add 3, 3, 6 +; CHECKASM-NEXT: add 3, 3, 7 +; CHECKASM-NEXT: add 3, 3, 8 +; CHECKASM-NEXT: add 3, 3, 9 +; CHECKASM-NEXT: add 3, 3, 10 +; CHECKASM-NEXT: blr entry: %add = add i32 %a, %b %add1 = add i32 %add, %c @@ -203,84 +214,109 @@ entry: ret i32 %add6 } -; CHECK-LABEL: name: test_ints - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r9', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r10', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x7', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x8', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x9', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x10', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 - define void @call_test_ints() { +; ASM32PWR4-LABEL: call_test_ints: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: li 4, 1 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: lis 5, -32768 +; ASM32PWR4-NEXT: lis 6, -32768 +; ASM32PWR4-NEXT: li 7, 1 +; ASM32PWR4-NEXT: li 8, 1 +; ASM32PWR4-NEXT: li 9, 1 +; ASM32PWR4-NEXT: li 10, 1 +; ASM32PWR4-NEXT: bl .test_ints +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_ints: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: li 4, 1 +; ASM64PWR4-NEXT: std 0, 128(1) +; ASM64PWR4-NEXT: rldic 5, 3, 31, 32 +; ASM64PWR4-NEXT: lis 6, -32768 +; ASM64PWR4-NEXT: li 7, 1 +; ASM64PWR4-NEXT: li 8, 1 +; ASM64PWR4-NEXT: li 9, 1 +; ASM64PWR4-NEXT: li 10, 1 +; ASM64PWR4-NEXT: bl .test_ints +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: call i32 @test_ints(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1) ret void } -; CHECK-LABEL: name: call_test_ints - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x3 = LI8 1 -; 64BIT: renamable $x5 = RLDIC killed renamable $x3, 31, 32 -; 64BIT: $x3 = LI8 1 -; 64BIT: $x4 = LI8 1 -; 64BIT: $x6 = LIS8 32768 -; 64BIT: $x7 = LI8 1 -; 64BIT: $x8 = LI8 1 -; 64BIT: $x9 = LI8 1 -; 64BIT: $x10 = LI8 1 -; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - define void @call_test_i64() { +; ASM32PWR4-LABEL: call_test_i64: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: li 3, 0 +; ASM32PWR4-NEXT: li 4, 1 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: li 5, 0 +; ASM32PWR4-NEXT: li 6, 2 +; ASM32PWR4-NEXT: li 7, 0 +; ASM32PWR4-NEXT: li 8, 3 +; ASM32PWR4-NEXT: li 9, 0 +; ASM32PWR4-NEXT: li 10, 4 +; ASM32PWR4-NEXT: bl .test_i64 +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_i64: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: std 0, 128(1) +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: bl .test_i64 +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: call i64 @test_i64(i64 1, i64 2, i64 3, i64 4) ret void } -; CHECK-LABEL: name: call_test_i64 - -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 0 -; 32BIT: $r4 = LI 1 -; 32BIT: $r5 = LI 0 -; 32BIT: $r6 = LI 2 -; 32BIT: $r7 = LI 0 -; 32BIT: $r8 = LI 3 -; 32BIT: $r9 = LI 0 -; 32BIT: $r10 = LI 4 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: $x4 = LI8 2 -; 64BIT: $x5 = LI8 3 -; 64BIT: $x6 = LI8 4 -; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - define i64 @test_i64(i64 %a, i64 %b, i64 %c, i64 %d) { +; ASM32PWR4-LABEL: test_i64: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: addc 4, 4, 6 +; ASM32PWR4-NEXT: adde 3, 3, 5 +; ASM32PWR4-NEXT: addc 4, 4, 8 +; ASM32PWR4-NEXT: adde 3, 3, 7 +; ASM32PWR4-NEXT: addc 4, 4, 10 +; ASM32PWR4-NEXT: adde 3, 3, 9 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_i64: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: add 3, 3, 4 +; ASM64PWR4-NEXT: add 3, 3, 5 +; ASM64PWR4-NEXT: add 3, 3, 6 +; ASM64PWR4-NEXT: blr entry: %add = add nsw i64 %a, %b %add1 = add nsw i64 %add, %c @@ -288,31 +324,36 @@ entry: ret i64 %add2 } -; CHECK-LABEL: name: test_i64 - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r9', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r10', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 - define void @call_test_int_ptr() { +; ASM32PWR4-LABEL: call_test_int_ptr: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: li 3, 0 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: stw 3, 60(1) +; ASM32PWR4-NEXT: addi 3, 1, 60 +; ASM32PWR4-NEXT: bl .test_int_ptr +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_int_ptr: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: li 3, 0 +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: stw 3, 124(1) +; ASM64PWR4-NEXT: addi 3, 1, 124 +; ASM64PWR4-NEXT: bl .test_int_ptr +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %b = alloca i32, align 4 store i32 0, ptr %b, align 4 @@ -320,43 +361,56 @@ entry: ret void } -; CHECK-LABEL: name: call_test_int_ptr - -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: renamable $r3 = ADDI %stack.0.b, 0 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x3 = ADDI8 %stack.0.b, 0 -; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - define void @test_int_ptr(ptr %a) { +; ASM32PWR4-LABEL: test_int_ptr: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: stw 3, -8(1) +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_int_ptr: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: std 3, -8(1) +; ASM64PWR4-NEXT: blr entry: %a.addr = alloca ptr, align 8 store ptr %a, ptr %a.addr, align 8 ret void } -; CHECK-LABEL: name: test_int_ptr - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3 -; 32BIT: STW killed renamable $r3, 0, %stack.0.a.addr :: (store (s32) into %ir.a.addr, align 8) - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3 -; 64BIT: STD killed renamable $x3, 0, %stack.0.a.addr :: (store (s64) into %ir.a.addr) - - define i32 @caller(i32 %i) { +; ASM32PWR4-LABEL: caller: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: stw 3, 60(1) +; ASM32PWR4-NEXT: cntlzw 3, 3 +; ASM32PWR4-NEXT: not 3, 3 +; ASM32PWR4-NEXT: rlwinm 3, 3, 27, 31, 31 +; ASM32PWR4-NEXT: stb 3, 59(1) +; ASM32PWR4-NEXT: bl .call_test_bool[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: caller: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: stw 3, 124(1) +; ASM64PWR4-NEXT: cntlzw 3, 3 +; ASM64PWR4-NEXT: srwi 3, 3, 5 +; ASM64PWR4-NEXT: xori 3, 3, 1 +; ASM64PWR4-NEXT: stb 3, 123(1) +; ASM64PWR4-NEXT: bl .call_test_bool[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %i.addr = alloca i32, align 4 %b = alloca i8, align 1 @@ -373,187 +427,147 @@ entry: declare i32 @call_test_bool(i1 zeroext) -; CHECK-LABEL: name: caller - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT: liveins: $r3 -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1, implicit-def $r3 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3 -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1, implicit-def $x3 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - @f1 = global float 0.000000e+00, align 4 @d1 = global double 0.000000e+00, align 8 define void @call_test_floats() { +; ASM32PWR4-LABEL: call_test_floats: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz 3, L..C1(2) # @f1 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: lfs 1, 0(3) +; ASM32PWR4-NEXT: fmr 2, 1 +; ASM32PWR4-NEXT: fmr 3, 1 +; ASM32PWR4-NEXT: bl .test_floats +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_floats: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld 3, L..C1(2) # @f1 +; ASM64PWR4-NEXT: std 0, 128(1) +; ASM64PWR4-NEXT: lfs 1, 0(3) +; ASM64PWR4-NEXT: fmr 2, 1 +; ASM64PWR4-NEXT: fmr 3, 1 +; ASM64PWR4-NEXT: bl .test_floats +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f1, align 4 call float @test_floats(float %0, float %0, float %0) ret void } -; CHECK-LABEL: name: call_test_floats{{.*}} - -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) -; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: $f2 = COPY renamable $f1 -; 32BIT-NEXT: $f3 = COPY renamable $f1 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) -; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: $f2 = COPY renamable $f1 -; 64BIT-NEXT: $f3 = COPY renamable $f1 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - define float @test_floats(float %f1, float %f2, float %f3) { +; CHECKASM-LABEL: test_floats: +; CHECKASM: # %bb.0: # %entry +; CHECKASM-NEXT: fadds 0, 1, 2 +; CHECKASM-NEXT: fadds 1, 0, 3 +; CHECKASM-NEXT: blr entry: %add = fadd float %f1, %f2 %add1 = fadd float %add, %f3 ret float %add1 } -; CHECK-LABEL: name: test_floats{{.*}} - -; CHECK: liveins: -; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' } -; CHECK: body: | -; CHECK-NEXT: bb.0.entry: -; CHECK-NEXT: liveins: $f1, $f2, $f3 - define void @call_test_fpr_max() { +; ASM32PWR4-LABEL: call_test_fpr_max: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -128(1) +; ASM32PWR4-NEXT: lwz 3, L..C2(2) # @d1 +; ASM32PWR4-NEXT: stw 0, 136(1) +; ASM32PWR4-NEXT: lfd 1, 0(3) +; ASM32PWR4-NEXT: fmr 2, 1 +; ASM32PWR4-NEXT: fmr 3, 1 +; ASM32PWR4-NEXT: stfd 1, 120(1) +; ASM32PWR4-NEXT: stfd 1, 112(1) +; ASM32PWR4-NEXT: fmr 4, 1 +; ASM32PWR4-NEXT: fmr 5, 1 +; ASM32PWR4-NEXT: stfd 1, 104(1) +; ASM32PWR4-NEXT: fmr 6, 1 +; ASM32PWR4-NEXT: fmr 7, 1 +; ASM32PWR4-NEXT: stfd 1, 96(1) +; ASM32PWR4-NEXT: stfd 1, 88(1) +; ASM32PWR4-NEXT: fmr 8, 1 +; ASM32PWR4-NEXT: fmr 9, 1 +; ASM32PWR4-NEXT: stfd 1, 80(1) +; ASM32PWR4-NEXT: fmr 10, 1 +; ASM32PWR4-NEXT: fmr 11, 1 +; ASM32PWR4-NEXT: stfd 1, 72(1) +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: fmr 12, 1 +; ASM32PWR4-NEXT: fmr 13, 1 +; ASM32PWR4-NEXT: stfd 1, 56(1) +; ASM32PWR4-NEXT: bl .test_fpr_max +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 128 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_fpr_max: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -160(1) +; ASM64PWR4-NEXT: ld 3, L..C2(2) # @d1 +; ASM64PWR4-NEXT: std 0, 176(1) +; ASM64PWR4-NEXT: lfd 1, 0(3) +; ASM64PWR4-NEXT: fmr 2, 1 +; ASM64PWR4-NEXT: fmr 3, 1 +; ASM64PWR4-NEXT: stfd 1, 144(1) +; ASM64PWR4-NEXT: stfd 1, 136(1) +; ASM64PWR4-NEXT: fmr 4, 1 +; ASM64PWR4-NEXT: fmr 5, 1 +; ASM64PWR4-NEXT: stfd 1, 128(1) +; ASM64PWR4-NEXT: fmr 6, 1 +; ASM64PWR4-NEXT: fmr 7, 1 +; ASM64PWR4-NEXT: stfd 1, 120(1) +; ASM64PWR4-NEXT: stfd 1, 112(1) +; ASM64PWR4-NEXT: fmr 8, 1 +; ASM64PWR4-NEXT: fmr 9, 1 +; ASM64PWR4-NEXT: fmr 10, 1 +; ASM64PWR4-NEXT: fmr 11, 1 +; ASM64PWR4-NEXT: fmr 12, 1 +; ASM64PWR4-NEXT: fmr 13, 1 +; ASM64PWR4-NEXT: bl .test_fpr_max +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 160 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load double, ptr @d1, align 8 call double @test_fpr_max(double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0) ret void } -; CHECK-LABEL: name: call_test_fpr_max{{.*}} - -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) -; 32BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: STFD renamable $f1, 56, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 64, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 72, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 80, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 88, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 96, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 104, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 112, $r1 :: (store (s64)) -; 32BIT-DAG: STFD renamable $f1, 120, $r1 :: (store (s64)) -; 32BIT-DAG: $f2 = COPY renamable $f1 -; 32BIT-DAG: $f3 = COPY renamable $f1 -; 32BIT-DAG: $f4 = COPY renamable $f1 -; 32BIT-DAG: $f5 = COPY renamable $f1 -; 32BIT-DAG: $f6 = COPY renamable $f1 -; 32BIT-DAG: $f7 = COPY renamable $f1 -; 32BIT-DAG: $f8 = COPY renamable $f1 -; 32BIT-DAG: $f9 = COPY renamable $f1 -; 32BIT-DAG: $f10 = COPY renamable $f1 -; 32BIT-DAG: $f11 = COPY renamable $f1 -; 32BIT-DAG: $f12 = COPY renamable $f1 -; 32BIT-DAG: $f13 = COPY renamable $f1 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 -; 32BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 - -; CHECKASM-LABEL: .call_test_fpr_max: - -; ASM32PWR4: stwu 1, -128(1) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C2(2) -; ASM32PWR4-NEXT: stw 0, 136(1) -; ASM32PWR4-NEXT: lfd 1, 0([[REG]]) -; ASM32PWR4-DAG: stfd 1, 56(1) -; ASM32PWR4-DAG: stfd 1, 64(1) -; ASM32PWR4-DAG: stfd 1, 72(1) -; ASM32PWR4-DAG: stfd 1, 80(1) -; ASM32PWR4-DAG: stfd 1, 88(1) -; ASM32PWR4-DAG: stfd 1, 96(1) -; ASM32PWR4-DAG: stfd 1, 104(1) -; ASM32PWR4-DAG: stfd 1, 112(1) -; ASM32PWR4-DAG: stfd 1, 120(1) -; ASM32PWR4-DAG: fmr 2, 1 -; ASM32PWR4-DAG: fmr 3, 1 -; ASM32PWR4-DAG: fmr 4, 1 -; ASM32PWR4-DAG: fmr 5, 1 -; ASM32PWR4-DAG: fmr 6, 1 -; ASM32PWR4-DAG: fmr 7, 1 -; ASM32PWR4-DAG: fmr 8, 1 -; ASM32PWR4-DAG: fmr 9, 1 -; ASM32PWR4-DAG: fmr 10, 1 -; ASM32PWR4-DAG: fmr 11, 1 -; ASM32PWR4-DAG: fmr 12, 1 -; ASM32PWR4-DAG: fmr 13, 1 -; ASM32PWR4-NEXT: bl .test_fpr_max -; ASM32PWR4-NEXT: nop -; ASM32PWR4-NEXT: addi 1, 1, 128 - -; 64BIT: renamable $x[[REGD1ADDR:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x[[REGD1ADDR:[0-9]+]] :: (dereferenceable load (s64) from @d1) -; 64BIT-NEXT: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: STFD renamable $f1, 112, $x1 :: (store (s64)) -; 64BIT-DAG: STFD renamable $f1, 120, $x1 :: (store (s64)) -; 64BIT-DAG: STFD renamable $f1, 128, $x1 :: (store (s64)) -; 64BIT-DAG: STFD renamable $f1, 136, $x1 :: (store (s64)) -; 64BIT-DAG: STFD renamable $f1, 144, $x1 :: (store (s64)) -; 64BIT-DAG: $f2 = COPY renamable $f1 -; 64BIT-DAG: $f3 = COPY renamable $f1 -; 64BIT-DAG: $f4 = COPY renamable $f1 -; 64BIT-DAG: $f5 = COPY renamable $f1 -; 64BIT-DAG: $f6 = COPY renamable $f1 -; 64BIT-DAG: $f7 = COPY renamable $f1 -; 64BIT-DAG: $f8 = COPY renamable $f1 -; 64BIT-DAG: $f9 = COPY renamable $f1 -; 64BIT-DAG: $f10 = COPY renamable $f1 -; 64BIT-DAG: $f11 = COPY renamable $f1 -; 64BIT-DAG: $f12 = COPY renamable $f1 -; 64BIT-DAG: $f13 = COPY renamable $f1 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 - -; ASM64PWR4: stdu 1, -160(1) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C2(2) -; ASM64PWR4-NEXT: std 0, 176(1) -; ASM64PWR4-NEXT: lfd 1, 0([[REG]]) -; ASM64PWR4-DAG: stfd 1, 112(1) -; ASM64PWR4-DAG: stfd 1, 120(1) -; ASM64PWR4-DAG: stfd 1, 128(1) -; ASM64PWR4-DAG: stfd 1, 136(1) -; ASM64PWR4-DAG: stfd 1, 144(1) -; ASM64PWR4-DAG: fmr 2, 1 -; ASM64PWR4-DAG: fmr 3, 1 -; ASM64PWR4-DAG: fmr 4, 1 -; ASM64PWR4-DAG: fmr 5, 1 -; ASM64PWR4-DAG: fmr 6, 1 -; ASM64PWR4-DAG: fmr 7, 1 -; ASM64PWR4-DAG: fmr 8, 1 -; ASM64PWR4-DAG: fmr 9, 1 -; ASM64PWR4-DAG: fmr 10, 1 -; ASM64PWR4-DAG: fmr 11, 1 -; ASM64PWR4-DAG: fmr 12, 1 -; ASM64PWR4-DAG: fmr 13, 1 -; ASM64PWR4-NEXT: bl .test_fpr_max -; ASM64PWR4-NEXT: nop -; ASM64PWR4-NEXT: addi 1, 1, 160 - define double @test_fpr_max(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13) { +; CHECKASM-LABEL: test_fpr_max: +; CHECKASM: # %bb.0: # %entry +; CHECKASM-NEXT: fadd 0, 1, 2 +; CHECKASM-NEXT: fadd 0, 0, 3 +; CHECKASM-NEXT: fadd 0, 0, 4 +; CHECKASM-NEXT: fadd 0, 0, 5 +; CHECKASM-NEXT: fadd 0, 0, 6 +; CHECKASM-NEXT: fadd 0, 0, 7 +; CHECKASM-NEXT: fadd 0, 0, 8 +; CHECKASM-NEXT: fadd 0, 0, 9 +; CHECKASM-NEXT: fadd 0, 0, 10 +; CHECKASM-NEXT: fadd 0, 0, 11 +; CHECKASM-NEXT: fadd 0, 0, 12 +; CHECKASM-NEXT: fadd 1, 0, 13 +; CHECKASM-NEXT: blr entry: %add = fadd double %d1, %d2 %add1 = fadd double %add, %d3 @@ -570,27 +584,42 @@ entry: ret double %add11 } -; CHECK-LABEL: name: test_fpr_max{{.*}} - -; CHECK: liveins: -; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f4', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f5', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f6', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f7', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f8', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f9', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f10', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f11', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f12', virtual-reg: '' } -; CHECK-NEXT: - { reg: '$f13', virtual-reg: '' } -; CHECK: body: | -; CHECK-NEXT: bb.0.entry: -; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 - define void @call_test_mix() { +; ASM32PWR4-LABEL: call_test_mix: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz 3, L..C1(2) # @f1 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: li 4, 1 +; ASM32PWR4-NEXT: li 7, 97 +; ASM32PWR4-NEXT: lfs 1, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C2(2) # @d1 +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: bl .test_mix +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_mix: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld 3, L..C1(2) # @f1 +; ASM64PWR4-NEXT: std 0, 128(1) +; ASM64PWR4-NEXT: li 4, 1 +; ASM64PWR4-NEXT: li 6, 97 +; ASM64PWR4-NEXT: lfs 1, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C2(2) # @d1 +; ASM64PWR4-NEXT: lfd 2, 0(3) +; ASM64PWR4-NEXT: bl .test_mix +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f1, align 4 %1 = load double, ptr @d1, align 8 @@ -598,29 +627,46 @@ entry: ret void } -; CHECK-LABEL: name: call_test_mix{{.*}} - -; 32BIT: renamable $r[[REG1:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $r[[REG2:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG1]] :: (dereferenceable load (s32) from @f1) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG2]] :: (dereferenceable load (s64) from @d1) -; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: $r4 = LI 1 -; 32BIT-NEXT: $r7 = LI 97 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r4, implicit $f2, implicit killed $r7, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load (s32) from @f1) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load (s64) from @d1) -; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: $x4 = LI8 1 -; 64BIT-NEXT: $x6 = LI8 97 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x4, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - define i32 @test_mix(float %f, i32 signext %i, double %d, i8 signext %c) { +; ASM32PWR4-LABEL: test_mix: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: lis 3, 17200 +; ASM32PWR4-NEXT: fadd 1, 1, 2 +; ASM32PWR4-NEXT: stw 3, -16(1) +; ASM32PWR4-NEXT: lwz 3, L..C3(2) # %const.0 +; ASM32PWR4-NEXT: frsp 1, 1 +; ASM32PWR4-NEXT: lfs 0, 0(3) +; ASM32PWR4-NEXT: clrlwi 3, 7, 24 +; ASM32PWR4-NEXT: add 3, 4, 3 +; ASM32PWR4-NEXT: xoris 3, 3, 32768 +; ASM32PWR4-NEXT: stw 3, -12(1) +; ASM32PWR4-NEXT: addi 3, 1, -4 +; ASM32PWR4-NEXT: lfd 2, -16(1) +; ASM32PWR4-NEXT: fsub 0, 2, 0 +; ASM32PWR4-NEXT: frsp 0, 0 +; ASM32PWR4-NEXT: fadds 0, 0, 1 +; ASM32PWR4-NEXT: fctiwz 0, 0 +; ASM32PWR4-NEXT: stfiwx 0, 0, 3 +; ASM32PWR4-NEXT: lwz 3, -4(1) +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_mix: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: clrlwi 5, 6, 24 +; ASM64PWR4-NEXT: fadd 0, 1, 2 +; ASM64PWR4-NEXT: addi 3, 1, -4 +; ASM64PWR4-NEXT: frsp 0, 0 +; ASM64PWR4-NEXT: add 4, 4, 5 +; ASM64PWR4-NEXT: extsw 4, 4 +; ASM64PWR4-NEXT: std 4, -16(1) +; ASM64PWR4-NEXT: lfd 1, -16(1) +; ASM64PWR4-NEXT: fcfid 1, 1 +; ASM64PWR4-NEXT: frsp 1, 1 +; ASM64PWR4-NEXT: fadds 0, 1, 0 +; ASM64PWR4-NEXT: fctiwz 0, 0 +; ASM64PWR4-NEXT: stfiwx 0, 0, 3 +; ASM64PWR4-NEXT: lwz 3, -4(1) +; ASM64PWR4-NEXT: blr entry: %conv = fpext float %f to double %add = fadd double %conv, %d @@ -633,28 +679,27 @@ entry: ret i32 %conv6 } -; CHECK-LABEL: name: test_mix{{.*}} - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$f1', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$f2', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $f1, $f2, $r4, $r7 - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$f1', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$f2', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $f1, $f2, $x4, $x6 - - define i64 @callee_mixed_ints(i32 %a, i8 signext %b, i32 %c, i16 signext %d, i64 %e) { +; ASM32PWR4-LABEL: callee_mixed_ints: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: clrlwi 4, 4, 24 +; ASM32PWR4-NEXT: add 3, 3, 4 +; ASM32PWR4-NEXT: add 3, 3, 5 +; ASM32PWR4-NEXT: add 3, 3, 6 +; ASM32PWR4-NEXT: srawi 5, 3, 31 +; ASM32PWR4-NEXT: addc 4, 3, 8 +; ASM32PWR4-NEXT: adde 3, 5, 7 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: callee_mixed_ints: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: clrlwi 4, 4, 24 +; ASM64PWR4-NEXT: add 3, 3, 4 +; ASM64PWR4-NEXT: add 3, 3, 5 +; ASM64PWR4-NEXT: add 3, 3, 6 +; ASM64PWR4-NEXT: extsw 3, 3 +; ASM64PWR4-NEXT: add 3, 3, 7 +; ASM64PWR4-NEXT: blr entry: %conv = zext i8 %b to i32 %add = add nsw i32 %a, %conv @@ -666,30 +711,50 @@ entry: ret i64 %add5 } -; CHECK-LABEL: name: callee_mixed_ints - -; 32BIT: liveins: -; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } -; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } -; 32BIT: body: | -; 32BIT-NEXT: bb.0.entry: -; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8 - -; 64BIT: liveins: -; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } -; 64BIT-NEXT: - { reg: '$x7', virtual-reg: '' } -; 64BIT: body: | -; 64BIT-NEXT: bb.0.entry: -; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7 - define void @call_test_vararg() { +; ASM32PWR4-LABEL: call_test_vararg: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz 3, L..C1(2) # @f1 +; ASM32PWR4-NEXT: stw 0, 88(1) +; ASM32PWR4-NEXT: lfs 1, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C2(2) # @d1 +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfd 2, 72(1) +; ASM32PWR4-NEXT: lwz 4, 64(1) +; ASM32PWR4-NEXT: lwz 5, 68(1) +; ASM32PWR4-NEXT: lwz 6, 72(1) +; ASM32PWR4-NEXT: lwz 7, 76(1) +; ASM32PWR4-NEXT: bl .test_vararg[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_vararg: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C1(2) # @f1 +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: lfs 1, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C2(2) # @d1 +; ASM64PWR4-NEXT: stfd 1, 112(1) +; ASM64PWR4-NEXT: lfd 2, 0(3) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfd 2, 120(1) +; ASM64PWR4-NEXT: ld 4, 112(1) +; ASM64PWR4-NEXT: ld 5, 120(1) +; ASM64PWR4-NEXT: bl .test_vararg[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f1, align 4 %conv = fpext float %0 to double @@ -700,69 +765,52 @@ entry: declare void @test_vararg(i32, ...) -; CHECK-LABEL: name: call_test_vararg - -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]], align 8) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) -; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]] + 4) -; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) -; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]], align 8) -; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]] + 4) -; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: $r3 = LI 42 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit $f2, implicit $r6, implicit $r7, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; CHECKASM-LABEL: .call_test_vararg: - -; ASM32PWR4: stwu 1, -80(1) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C1(2) -; ASM32PWR4-NEXT: stw 0, 88(1) -; ASM32PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C2(2) -; ASM32PWR4-NEXT: stfd 1, 64(1) -; ASM32PWR4-NEXT: lfd 2, 0([[REG]]) -; ASM32PWR4-NEXT: li 3, 42 -; ASM32PWR4-NEXT: stfd 2, 72(1) -; ASM32PWR4-DAG: lwz 4, 64(1) -; ASM32PWR4-DAG: lwz 5, 68(1) -; ASM32PWR4-DAG: lwz 6, 72(1) -; ASM32PWR4-DAG: lwz 7, 76(1) -; ASM32PWR4-NEXT: bl .test_vararg[PR] -; ASM32PWR4-NEXT: nop - -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) -; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d1) -; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load (s64) from %stack.[[SLOT1]]) -; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) -; 64BIT-NEXT: renamable $x5 = LD 0, %stack.[[SLOT2]] :: (load (s64) from %stack.[[SLOT2]]) -; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: $x3 = LI8 42 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $f2, implicit $x5, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - -; ASM64PWR4: stdu 1, -128(1) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2) -; ASM64PWR4-NEXT: std 0, 144(1) -; ASM64PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C2(2) -; ASM64PWR4-NEXT: stfd 1, 112(1) -; ASM64PWR4-NEXT: lfd 2, 0([[REG]]) -; ASM64PWR4-NEXT: li 3, 42 -; ASM64PWR4-NEXT: stfd 2, 120(1) -; ASM64PWR4-NEXT: ld 4, 112(1) -; ASM64PWR4-NEXT: ld 5, 120(1) -; ASM64PWR4-NEXT: bl .test_vararg[PR] -; ASM64PWR4-NEXT: nop - define void @call_test_vararg2() { +; ASM32PWR4-LABEL: call_test_vararg2: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz 3, L..C1(2) # @f1 +; ASM32PWR4-NEXT: stw 0, 88(1) +; ASM32PWR4-NEXT: li 6, 42 +; ASM32PWR4-NEXT: lfs 1, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C2(2) # @d1 +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfd 2, 72(1) +; ASM32PWR4-NEXT: lwz 4, 64(1) +; ASM32PWR4-NEXT: lwz 5, 68(1) +; ASM32PWR4-NEXT: lwz 7, 72(1) +; ASM32PWR4-NEXT: lwz 8, 76(1) +; ASM32PWR4-NEXT: bl .test_vararg[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_vararg2: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C1(2) # @f1 +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: li 5, 42 +; ASM64PWR4-NEXT: lfs 1, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C2(2) # @d1 +; ASM64PWR4-NEXT: stfd 1, 112(1) +; ASM64PWR4-NEXT: lfd 2, 0(3) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfd 2, 120(1) +; ASM64PWR4-NEXT: ld 4, 112(1) +; ASM64PWR4-NEXT: ld 6, 120(1) +; ASM64PWR4-NEXT: bl .test_vararg[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f1, align 4 %conv = fpext float %0 to double @@ -771,71 +819,53 @@ entry: ret void } -; CHECK-LABEL: name: call_test_vararg2 - -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]], align 8) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) -; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]] + 4) -; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) -; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]], align 8) -; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]] + 4) -; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: $r3 = LI 42 -; 32BIT-NEXT: $r6 = LI 42 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit $f2, implicit $r7, implicit $r8, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; ASM32PWR4: stwu 1, -80(1) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C1(2) -; ASM32PWR4-NEXT: stw 0, 88(1) -; ASM32PWR4-NEXT: li 6, 42 -; ASM32PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C2(2) -; ASM32PWR4-NEXT: stfd 1, 64(1) -; ASM32PWR4-NEXT: lfd 2, 0([[REG]]) -; ASM32PWR4-NEXT: li 3, 42 -; ASM32PWR4-NEXT: stfd 2, 72(1) -; ASM32PWR4-DAG: lwz 4, 64(1) -; ASM32PWR4-DAG: lwz 5, 68(1) -; ASM32PWR4-DAG: lwz 7, 72(1) -; ASM32PWR4-DAG: lwz 8, 76(1) -; ASM32PWR4-NEXT: bl .test_vararg[PR] -; ASM32PWR4-NEXT: nop - -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) -; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d1) -; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load (s64) from %stack.[[SLOT1]]) -; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) -; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load (s64) from %stack.[[SLOT2]]) -; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: $x3 = LI8 42 -; 64BIT-NEXT: $x5 = LI8 42 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - -; ASM64PWR4: stdu 1, -128(1) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2) -; ASM64PWR4-NEXT: std 0, 144(1) -; ASM64PWR4-NEXT: li 5, 42 -; ASM64PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C2(2) -; ASM64PWR4-NEXT: stfd 1, 112(1) -; ASM64PWR4-NEXT: lfd 2, 0([[REG]]) -; ASM64PWR4-NEXT: li 3, 42 -; ASM64PWR4-NEXT: stfd 2, 120(1) -; ASM64PWR4-NEXT: ld 4, 112(1) -; ASM64PWR4-NEXT: ld 6, 120(1) -; ASM64PWR4-NEXT: bl .test_vararg[PR] -; ASM64PWR4-NEXT: nop - define void @call_test_vararg3() { +; ASM32PWR4-LABEL: call_test_vararg3: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz 3, L..C1(2) # @f1 +; ASM32PWR4-NEXT: stw 0, 88(1) +; ASM32PWR4-NEXT: li 6, 0 +; ASM32PWR4-NEXT: li 7, 42 +; ASM32PWR4-NEXT: lfs 1, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C2(2) # @d1 +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfd 2, 72(1) +; ASM32PWR4-NEXT: lwz 4, 64(1) +; ASM32PWR4-NEXT: lwz 5, 68(1) +; ASM32PWR4-NEXT: lwz 8, 72(1) +; ASM32PWR4-NEXT: lwz 9, 76(1) +; ASM32PWR4-NEXT: bl .test_vararg[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_vararg3: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C1(2) # @f1 +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: li 5, 42 +; ASM64PWR4-NEXT: lfs 1, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C2(2) # @d1 +; ASM64PWR4-NEXT: stfd 1, 112(1) +; ASM64PWR4-NEXT: lfd 2, 0(3) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfd 2, 120(1) +; ASM64PWR4-NEXT: ld 4, 112(1) +; ASM64PWR4-NEXT: ld 6, 120(1) +; ASM64PWR4-NEXT: bl .test_vararg[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f1, align 4 %conv = fpext float %0 to double @@ -844,118 +874,46 @@ entry: ret void } -; CHECK-LABEL: name: call_test_vararg3 - -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]], align 8) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) -; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]] + 4) -; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) -; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]], align 8) -; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]] + 4) -; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: $r3 = LI 42 -; 32BIT-NEXT: $r6 = LI 0 -; 32BIT-NEXT: $r7 = LI 42 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit killed $r6, implicit killed $r7, implicit $f2, implicit $r8, implicit $r9, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; ASM32PWR4: stwu 1, -80(1) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C1(2) -; ASM32PWR4-DAG: li 6, 0 -; ASM32PWR4-DAG: li 7, 42 -; ASM32PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C2(2) -; ASM32PWR4-NEXT: stfd 1, 64(1) -; ASM32PWR4-NEXT: lfd 2, 0([[REG]]) -; ASM32PWR4-NEXT: li 3, 42 -; ASM32PWR4-NEXT: stfd 2, 72(1) -; ASM32PWR4-DAG: lwz 4, 64(1) -; ASM32PWR4-DAG: lwz 5, 68(1) -; ASM32PWR4-DAG: lwz 8, 72(1) -; ASM32PWR4-DAG: lwz 9, 76(1) -; ASM32PWR4-NEXT: bl .test_vararg[PR] -; ASM32PWR4-NEXT: nop - -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) -; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d1) -; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load (s64) from %stack.[[SLOT1]]) -; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) -; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load (s64) from %stack.[[SLOT2]]) -; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: $x3 = LI8 42 -; 64BIT-NEXT: $x5 = LI8 42 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit killed $x5, implicit $f2, implicit $x6, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - -; ASM64PWR4: stdu 1, -128(1) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2) -; ASM64PWR4-NEXT: std 0, 144(1) -; ASM64PWR4-NEXT: li 5, 42 -; ASM64PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C2(2) -; ASM64PWR4-NEXT: stfd 1, 112(1) -; ASM64PWR4-NEXT: lfd 2, 0([[REG]]) -; ASM64PWR4-NEXT: li 3, 42 -; ASM64PWR4-NEXT: stfd 2, 120(1) -; ASM64PWR4-DAG: ld 4, 112(1) -; ASM64PWR4-DAG: ld 6, 120(1) -; ASM64PWR4-NEXT: bl .test_vararg[PR] -; ASM64PWR4-NEXT: nop - define void @call_test_vararg4() { +; ASM32PWR4-LABEL: call_test_vararg4: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz 3, L..C1(2) # @f1 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: lfs 1, 0(3) +; ASM32PWR4-NEXT: li 3, 42 +; ASM32PWR4-NEXT: stfs 1, 60(1) +; ASM32PWR4-NEXT: lwz 4, 60(1) +; ASM32PWR4-NEXT: bl .test_vararg[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_vararg4: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C1(2) # @f1 +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: lfs 1, 0(3) +; ASM64PWR4-NEXT: li 3, 42 +; ASM64PWR4-NEXT: stfs 1, 124(1) +; ASM64PWR4-NEXT: lwz 4, 124(1) +; ASM64PWR4-NEXT: bl .test_vararg[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f1, align 4 call void (i32, ...) @test_vararg(i32 42, float %0) ret void } -; CHECK-LABEL: name: call_test_vararg4 - -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) -; 32BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store (s32) into %stack.[[SLOT]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT]] :: (load (s32) from %stack.[[SLOT]]) -; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: $r3 = LI 42 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; ASM32PWR4: stwu 1, -64(1) -; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C1(2) -; ASM32PWR4-NEXT: stw 0, 72(1) -; ASM32PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM32PWR4-NEXT: li 3, 42 -; ASM32PWR4-NEXT: stfs 1, 60(1) -; ASM32PWR4-NEXT: lwz 4, 60(1) -; ASM32PWR4-NEXT: bl .test_vararg[PR] -; ASM32PWR4-NEXT: nop - -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) -; 64BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store (s32) into %stack.[[SLOT]]) -; 64BIT-NEXT: renamable $x4 = LWZ8 0, %stack.[[SLOT]] :: (load (s32) from %stack.[[SLOT]]) -; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: $x3 = LI8 42 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - -; ASM64PWR4: stdu 1, -128(1) -; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2) -; ASM64PWR4-NEXT: std 0, 144(1) -; ASM64PWR4-NEXT: lfs 1, 0([[REG]]) -; ASM64PWR4-NEXT: li 3, 42 -; ASM64PWR4-NEXT: stfs 1, 124(1) -; ASM64PWR4-NEXT: lwz 4, 124(1) -; ASM64PWR4-NEXT: bl .test_vararg[PR] -; ASM64PWR4-NEXT: nop - @c = common global i8 0, align 1 @si = common global i16 0, align 2 @i = common global i32 0, align 4 @@ -965,6 +923,73 @@ entry: ; Basic saving of integral type arguments to the parameter save area. define void @call_test_stackarg_int() { +; ASM32PWR4-LABEL: call_test_stackarg_int: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz 3, L..C4(2) # @si +; ASM32PWR4-NEXT: stw 0, 88(1) +; ASM32PWR4-NEXT: lwz 4, L..C5(2) # @i +; ASM32PWR4-NEXT: li 6, 4 +; ASM32PWR4-NEXT: li 8, 6 +; ASM32PWR4-NEXT: li 9, 7 +; ASM32PWR4-NEXT: li 10, 8 +; ASM32PWR4-NEXT: lha 7, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C6(2) # @c +; ASM32PWR4-NEXT: lbz 11, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C7(2) # @lli +; ASM32PWR4-NEXT: lwz 5, 0(4) +; ASM32PWR4-NEXT: lwz 4, 0(3) +; ASM32PWR4-NEXT: lwz 3, 4(3) +; ASM32PWR4-NEXT: stw 5, 76(1) +; ASM32PWR4-NEXT: stw 3, 72(1) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: stw 4, 68(1) +; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: stw 5, 64(1) +; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: stw 7, 60(1) +; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: stw 11, 56(1) +; ASM32PWR4-NEXT: bl .test_stackarg_int[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_stackarg_int: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -160(1) +; ASM64PWR4-NEXT: ld 3, L..C3(2) # @si +; ASM64PWR4-NEXT: std 0, 176(1) +; ASM64PWR4-NEXT: ld 4, L..C4(2) # @i +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: li 8, 6 +; ASM64PWR4-NEXT: li 9, 7 +; ASM64PWR4-NEXT: li 10, 8 +; ASM64PWR4-NEXT: lha 7, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C5(2) # @c +; ASM64PWR4-NEXT: lbz 11, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C6(2) # @lli +; ASM64PWR4-NEXT: lwz 5, 0(4) +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: ld 3, 0(3) +; ASM64PWR4-NEXT: std 5, 144(1) +; ASM64PWR4-NEXT: std 3, 136(1) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: std 5, 128(1) +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: std 7, 120(1) +; ASM64PWR4-NEXT: li 7, 5 +; ASM64PWR4-NEXT: std 11, 112(1) +; ASM64PWR4-NEXT: bl .test_stackarg_int[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 160 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load i8, ptr @c, align 1 %1 = load i16, ptr @si, align 2 @@ -977,121 +1002,60 @@ entry: declare void @test_stackarg_int(i32, i32, i32, i32, i32, i32, i32, i32, i8 zeroext, i16 signext, i32, i64, i32) -; CHECK-LABEL: name: call_test_stackarg_int{{.*}} - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 32BIT-DAG: ADJCALLSTACKDOWN 80, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: $r3 = LI 1 -; 32BIT-DAG: $r4 = LI 2 -; 32BIT-DAG: $r5 = LI 3 -; 32BIT-DAG: $r6 = LI 4 -; 32BIT-DAG: $r7 = LI 5 -; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: renamable $r[[REGCADDR:[0-9]+]] = LWZtoc @c, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGC:[0-9]+]] = LBZ 0, killed renamable $r[[REGCADDR]] :: (dereferenceable load (s8) from @c) -; 32BIT-DAG: STW killed renamable $r[[REGC]], 56, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load (s16) from @si) -; 32BIT-DAG: STW killed renamable $r[[REGSI]], 60, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load (s32) from @i) -; 32BIT-DAG: STW killed renamable $r[[REGI]], 64, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGLLIADDR:[0-9]+]] = LWZtoc @lli, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGLLI1:[0-9]+]] = LWZ 0, renamable $r[[REGLLIADDR]] :: (dereferenceable load (s32) from @lli, align 8) -; 32BIT-DAG: STW killed renamable $r[[REGLLI1]], 68, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGLLI2:[0-9]+]] = LWZ 4, killed renamable $r[[REGLLIADDR]] :: (dereferenceable load (s32) from @lli + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REGLLI2]], 72, $r1 :: (store (s32)) -; 32BIT-DAG: STW renamable $r[[REGI]], 76, $r1 :: (store (s32)) -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 80, 0, implicit-def dead $r1, implicit $r1 - -; CHECKASM-LABEL: .call_test_stackarg_int: - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM32PWR4: stwu 1, -80(1) -; ASM32PWR4-DAG: li 3, 1 -; ASM32PWR4-DAG: li 4, 2 -; ASM32PWR4-DAG: li 5, 3 -; ASM32PWR4-DAG: li 6, 4 -; ASM32PWR4-DAG: li 7, 5 -; ASM32PWR4-DAG: li 8, 6 -; ASM32PWR4-DAG: li 9, 7 -; ASM32PWR4-DAG: li 10, 8 -; ASM32PWR4-DAG: lwz [[REGCADDR:[0-9]+]], L..C6(2) -; ASM32PWR4-DAG: lbz [[REGC:[0-9]+]], 0([[REGCADDR]]) -; ASM32PWR4-DAG: stw [[REGC]], 56(1) -; ASM32PWR4-DAG: lwz [[REGSIADDR:[0-9]+]], L..C4(2) -; ASM32PWR4-DAG: lha [[REGSI:[0-9]+]], 0([[REGSIADDR]]) -; ASM32PWR4-DAG: stw [[REGSI]], 60(1) -; ASM32PWR4-DAG: lwz [[REGIADDR:[0-9]+]], L..C5(2) -; ASM32PWR4-DAG: lwz [[REGI:[0-9]+]], 0([[REGIADDR]]) -; ASM32PWR4-DAG: stw [[REGI]], 64(1) -; ASM32PWR4-DAG: lwz [[REGLLIADDR:[0-9]+]], L..C7(2) -; ASM32PWR4-DAG: lwz [[REGLLI1:[0-9]+]], 0([[REGLLIADDR]]) -; ASM32PWR4-DAG: stw [[REGLLI1]], 68(1) -; ASM32PWR4-DAG: lwz [[REGLLI2:[0-9]+]], 4([[REGLLIADDR]]) -; ASM32PWR4-DAG: stw [[REGLLI2]], 72(1) -; ASM32PWR4-DAG: stw [[REGI]], 76(1) -; ASM32PWR4-NEXT: bl .test_stackarg_int[PR] -; ASM32PWR4-NEXT: nop - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 64BIT-DAG: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: $x3 = LI8 1 -; 64BIT-DAG: $x4 = LI8 2 -; 64BIT-DAG: $x5 = LI8 3 -; 64BIT-DAG: $x6 = LI8 4 -; 64BIT-DAG: $x7 = LI8 5 -; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: renamable $x[[REGCADDR:[0-9]+]] = LDtoc @c, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGC:[0-9]+]] = LBZ8 0, killed renamable $x[[REGCADDR]] :: (dereferenceable load (s8) from @c) -; 64BIT-DAG: STD killed renamable $x[[REGC]], 112, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @si, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LHA8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load (s16) from @si) -; 64BIT-DAG: STD killed renamable $x[[REGSI]], 120, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGIADDR:[0-9]+]] = LDtoc @i, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGIADDR]] :: (dereferenceable load (s32) from @i) -; 64BIT-DAG: STD killed renamable $x[[REGI]], 128, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGLLIADDR:[0-9]+]] = LDtoc @lli, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGLLI:[0-9]+]] = LD 0, killed renamable $x[[REGLLIADDR]] :: (dereferenceable load (s64) from @lli) -; 64BIT-DAG: STD killed renamable $x[[REGLLI]], 136, $x1 :: (store (s64)) -; 64BIT-DAG: STD renamable $x[[REGI]], 144, $x1 :: (store (s64)) -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM64PWR4-DAG: stdu 1, -160(1) -; ASM64PWR4-DAG: li 3, 1 -; ASM64PWR4-DAG: li 4, 2 -; ASM64PWR4-DAG: li 5, 3 -; ASM64PWR4-DAG: li 6, 4 -; ASM64PWR4-DAG: li 7, 5 -; ASM64PWR4-DAG: li 8, 6 -; ASM64PWR4-DAG: li 9, 7 -; ASM64PWR4-DAG: li 10, 8 -; ASM64PWR4-DAG: ld [[REGCADDR:[0-9]+]], L..C5(2) -; ASM64PWR4-DAG: lbz [[REGC:[0-9]+]], 0([[REGCADDR]]) -; ASM64PWR4-DAG: std [[REGC]], 112(1) -; ASM64PWR4-DAG: ld [[REGSIADDR:[0-9]+]], L..C3(2) -; ASM64PWR4-DAG: lha [[REGSI:[0-9]+]], 0([[REGSIADDR]]) -; ASM64PWR4-DAG: std [[REGSI]], 120(1) -; ASM64PWR4-DAG: ld [[REGIADDR:[0-9]+]], L..C4(2) -; ASM64PWR4-DAG: lwz [[REGI:[0-9]+]], 0([[REGIADDR]]) -; ASM64PWR4-DAG: std [[REGI]], 128(1) -; ASM64PWR4-DAG: ld [[REGLLIADDR:[0-9]+]], L..C6(2) -; ASM64PWR4-DAG: ld [[REGLLI:[0-9]+]], 0([[REGLLIADDR]]) -; ASM64PWR4-DAG: std [[REGLLI]], 136(1) -; ASM64PWR4-DAG: std [[REGI]], 144(1) -; ASM64PWR4-NEXT: bl .test_stackarg_int[PR] -; ASM64PWR4-NEXT: nop - ; Basic saving of floating point type arguments to the parameter save area. ; The float and double arguments will pass in both fpr as well as parameter save area. define void @call_test_stackarg_float() { +; ASM32PWR4-LABEL: call_test_stackarg_float: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz 3, L..C8(2) # @f +; ASM32PWR4-NEXT: stw 0, 88(1) +; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: li 6, 4 +; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: lfs 1, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C9(2) # @d +; ASM32PWR4-NEXT: li 8, 6 +; ASM32PWR4-NEXT: li 9, 7 +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: li 10, 8 +; ASM32PWR4-NEXT: stfd 2, 60(1) +; ASM32PWR4-NEXT: stfs 1, 56(1) +; ASM32PWR4-NEXT: bl .test_stackarg_float[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_stackarg_float: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C7(2) # @f +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: li 7, 5 +; ASM64PWR4-NEXT: lfs 1, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C8(2) # @d +; ASM64PWR4-NEXT: li 8, 6 +; ASM64PWR4-NEXT: li 9, 7 +; ASM64PWR4-NEXT: lfd 2, 0(3) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: li 10, 8 +; ASM64PWR4-NEXT: stfd 2, 120(1) +; ASM64PWR4-NEXT: stfs 1, 112(1) +; ASM64PWR4-NEXT: bl .test_stackarg_float[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f, align 4 %1 = load double, ptr @d, align 8 @@ -1101,89 +1065,51 @@ entry: declare void @test_stackarg_float(i32, i32, i32, i32, i32, i32, i32, i32, float, double) -; CHECK-LABEL: name: call_test_stackarg_float - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 32BIT-DAG: ADJCALLSTACKDOWN 68, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: $r3 = LI 1 -; 32BIT-DAG: $r4 = LI 2 -; 32BIT-DAG: $r5 = LI 3 -; 32BIT-DAG: $r6 = LI 4 -; 32BIT-DAG: $r7 = LI 5 -; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f1 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load (s32) from @f) -; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load (s64) from @d) -; 32BIT-DAG: STFS renamable $f1, 56, $r1 :: (store (s32)) -; 32BIT-DAG: STFD renamable $f2, 60, $r1 :: (store (s64)) -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 68, 0, implicit-def dead $r1, implicit $r1 - -; CHECKASM-LABEL: .call_test_stackarg_float: - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM32PWR4: stwu 1, -80(1) -; ASM32PWR4-DAG: li 3, 1 -; ASM32PWR4-DAG: li 4, 2 -; ASM32PWR4-DAG: li 5, 3 -; ASM32PWR4-DAG: li 6, 4 -; ASM32PWR4-DAG: li 7, 5 -; ASM32PWR4-DAG: li 8, 6 -; ASM32PWR4-DAG: li 9, 7 -; ASM32PWR4-DAG: li 10, 8 -; ASM32PWR4-DAG: lwz [[REGF:[0-9]+]], L..C8(2) -; ASM32PWR4-DAG: lfs 1, 0([[REGF]]) -; ASM32PWR4-DAG: lwz [[REGD:[0-9]+]], L..C9(2) -; ASM32PWR4-DAG: lfd 2, 0([[REGD:[0-9]+]]) -; ASM32PWR4-DAG: stfs 1, 56(1) -; ASM32PWR4-DAG: stfd 2, 60(1) -; ASM32PWR4-NEXT: bl .test_stackarg_float[PR] -; ASM32PWR4-NEXT: nop -; ASM32PWR4-NEXT: addi 1, 1, 80 - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 64BIT-DAG: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: $x3 = LI8 1 -; 64BIT-DAG: $x4 = LI8 2 -; 64BIT-DAG: $x5 = LI8 3 -; 64BIT-DAG: $x6 = LI8 4 -; 64BIT-DAG: $x7 = LI8 5 -; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $f1 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load (s32) from @f) -; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load (s64) from @d) -; 64BIT-DAG: STFS renamable $f1, 112, $x1 :: (store (s32)) -; 64BIT-DAG: STFD renamable $f2, 120, $x1 :: (store (s64)) -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $f1, implicit $f2, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM64PWR4: stdu 1, -128(1) -; ASM64PWR4-DAG: li 3, 1 -; ASM64PWR4-DAG: li 4, 2 -; ASM64PWR4-DAG: li 5, 3 -; ASM64PWR4-DAG: li 6, 4 -; ASM64PWR4-DAG: li 7, 5 -; ASM64PWR4-DAG: li 8, 6 -; ASM64PWR4-DAG: li 9, 7 -; ASM64PWR4-DAG: li 10, 8 -; ASM64PWR4-DAG: ld [[REGF:[0-9]+]], L..C7(2) -; ASM64PWR4-DAG: lfs 1, 0([[REGF]]) -; ASM64PWR4-DAG: ld [[REGD:[0-9]+]], L..C8(2) -; ASM64PWR4-DAG: lfd 2, 0([[REGD]]) -; ASM64PWR4-DAG: stfs 1, 112(1) -; ASM64PWR4-DAG: stfd 2, 120(1) -; ASM64PWR4-NEXT: bl .test_stackarg_float[PR] -; ASM64PWR4-NEXT: nop -; ASM64PWR4-NEXT: addi 1, 1, 128 - define void @call_test_stackarg_float2() { +; ASM32PWR4-LABEL: call_test_stackarg_float2: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz 3, L..C9(2) # @d +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: li 6, 4 +; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: lfd 1, 0(3) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: li 8, 6 +; ASM32PWR4-NEXT: stfd 1, 56(1) +; ASM32PWR4-NEXT: lwz 9, 56(1) +; ASM32PWR4-NEXT: lwz 10, 60(1) +; ASM32PWR4-NEXT: bl .test_stackarg_float2[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_stackarg_float2: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C8(2) # @d +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: li 7, 5 +; ASM64PWR4-NEXT: lfd 1, 0(3) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: li 8, 6 +; ASM64PWR4-NEXT: stfd 1, 120(1) +; ASM64PWR4-NEXT: ld 9, 120(1) +; ASM64PWR4-NEXT: bl .test_stackarg_float2[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load double, ptr @d, align 8 call void (i32, i32, i32, i32, i32, i32, ...) @test_stackarg_float2(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, double %0) @@ -1192,76 +1118,60 @@ entry: declare void @test_stackarg_float2(i32, i32, i32, i32, i32, i32, ...) -; CHECK-LABEL: name: call_test_stackarg_float2{{.*}} - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 32BIT-DAG: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: $r3 = LI 1 -; 32BIT-DAG: $r4 = LI 2 -; 32BIT-DAG: $r5 = LI 3 -; 32BIT-DAG: $r6 = LI 4 -; 32BIT-DAG: $r7 = LI 5 -; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: renamable $r[[REG:[0-9]+]] = LWZtoc @d, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d) -; 32BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) -; 32BIT-DAG: renamable $r9 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) -; 32BIT-DAG: renamable $r10 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit $f1, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; CHECKASM-LABEL: .call_test_stackarg_float2: - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM32PWR4: stwu 1, -64(1) -; ASM32PWR4-DAG: li 3, 1 -; ASM32PWR4-DAG: li 4, 2 -; ASM32PWR4-DAG: li 5, 3 -; ASM32PWR4-DAG: li 6, 4 -; ASM32PWR4-DAG: li 7, 5 -; ASM32PWR4-DAG: li 8, 6 -; ASM32PWR4-DAG: lwz [[REG:[0-9]+]], L..C9(2) -; ASM32PWR4-DAG: lfd 1, 0([[REG]]) -; ASM32PWR4-DAG: stfd 1, 56(1) -; ASM32PWR4-DAG: lwz 9, 56(1) -; ASM32PWR4-DAG: lwz 10, 60(1) -; ASM32PWR4-NEXT: bl .test_stackarg_float2[PR] -; ASM32PWR4-NEXT: nop -; ASM32PWR4-NEXT: addi 1, 1, 64 - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 64BIT-DAG: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: $x3 = LI8 1 -; 64BIT-DAG: $x4 = LI8 2 -; 64BIT-DAG: $x5 = LI8 3 -; 64BIT-DAG: $x6 = LI8 4 -; 64BIT-DAG: $x7 = LI8 5 -; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: renamable $x[[REG:[0-9]+]] = LDtoc @d, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d) -; 64BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) -; 64BIT-DAG: renamable $x9 = LD 0, %stack.0 :: (load (s64) from %stack.0) -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit $f1, implicit $x9, implicit $x2, implicit-def $r1 -; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM64PWR4: stdu 1, -128(1) -; ASM64PWR4-DAG: li 3, 1 -; ASM64PWR4-DAG: li 4, 2 -; ASM64PWR4-DAG: li 5, 3 -; ASM64PWR4-DAG: li 6, 4 -; ASM64PWR4-DAG: li 7, 5 -; ASM64PWR4-DAG: li 8, 6 -; ASM64PWR4-DAG: ld [[REG:[0-9]+]], L..C8(2) -; ASM64PWR4-DAG: lfd 1, 0([[REG]]) -; ASM64PWR4-DAG: stfd 1, 120(1) -; ASM64PWR4-DAG: ld 9, 120(1) -; ASM64PWR4-NEXT: bl .test_stackarg_float2[PR] -; ASM64PWR4-NEXT: nop -; ASM64PWR4-NEXT: addi 1, 1, 128 - ; A double arg will pass on the stack in PPC32 if there is only one available GPR. define void @call_test_stackarg_float3() { +; ASM32PWR4-LABEL: call_test_stackarg_float3: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -80(1) +; ASM32PWR4-NEXT: lwz 3, L..C9(2) # @d +; ASM32PWR4-NEXT: stw 0, 88(1) +; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: li 6, 4 +; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: lfd 1, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C8(2) # @f +; ASM32PWR4-NEXT: li 8, 6 +; ASM32PWR4-NEXT: li 9, 7 +; ASM32PWR4-NEXT: stfd 1, 72(1) +; ASM32PWR4-NEXT: lwz 10, 72(1) +; ASM32PWR4-NEXT: lfs 2, 0(3) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: stfs 2, 60(1) +; ASM32PWR4-NEXT: stfd 1, 52(1) +; ASM32PWR4-NEXT: bl .test_stackarg_float3[PR] +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 80 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_stackarg_float3: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C8(2) # @d +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: li 7, 5 +; ASM64PWR4-NEXT: lfd 1, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C7(2) # @f +; ASM64PWR4-NEXT: li 8, 6 +; ASM64PWR4-NEXT: li 9, 7 +; ASM64PWR4-NEXT: stfd 1, 120(1) +; ASM64PWR4-NEXT: ld 10, 120(1) +; ASM64PWR4-NEXT: lfs 2, 0(3) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: stfs 2, 112(1) +; ASM64PWR4-NEXT: bl .test_stackarg_float3[PR] +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load double, ptr @d, align 8 %1 = load float, ptr @f, align 4 @@ -1271,94 +1181,79 @@ entry: declare void @test_stackarg_float3(i32, i32, i32, i32, i32, i32, i32, ...) -; CHECK-LABEL: name: call_test_stackarg_float3{{.*}} - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; In 32-bit the double arg is written to memory because it cannot be fully stored in the last 32-bit GPR. -; 32BIT-DAG: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: $r3 = LI 1 -; 32BIT-DAG: $r4 = LI 2 -; 32BIT-DAG: $r5 = LI 3 -; 32BIT-DAG: $r6 = LI 4 -; 32BIT-DAG: $r7 = LI 5 -; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load (s64) from @d) -; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f2 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load (s32) from @f) -; 32BIT-DAG: STFD renamable $f1, 52, $r1 :: (store (s64)) -; 32BIT-DAG: STFS renamable $f2, 60, $r1 :: (store (s32)) -; 32BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) -; 32BIT-DAG: renamable $r10 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 -; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 - -; CHECKASM-LABEL: .call_test_stackarg_float3: - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM32PWR4: stwu 1, -80(1) -; ASM32PWR4-DAG: li 3, 1 -; ASM32PWR4-DAG: li 4, 2 -; ASM32PWR4-DAG: li 5, 3 -; ASM32PWR4-DAG: li 6, 4 -; ASM32PWR4-DAG: li 7, 5 -; ASM32PWR4-DAG: li 8, 6 -; ASM32PWR4-DAG: li 9, 7 -; ASM32PWR4-DAG: lwz [[REGD:[0-9]+]], L..C9(2) -; ASM32PWR4-DAG: lfd 1, 0([[REGD]]) -; ASM32PWR4-DAG: lwz [[REGF:[0-9]+]], L..C8(2) -; ASM32PWR4-DAG: lfs 2, 0([[REGF]]) -; ASM32PWR4-DAG: stfd 1, 52(1) -; ASM32PWR4-DAG: stfs 2, 60(1) -; ASM32PWR4-DAG: stfd 1, 72(1) -; ASM32PWR4-DAG: lwz 10, 72(1) -; ASM32PWR4-NEXT: bl .test_stackarg_float3[PR] -; ASM32PWR4-NEXT: nop -; ASM32PWR4-NEXT: addi 1, 1, 80 - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; In 64-bit the double arg is not written to memory because it is fully stored in the last 64-bit GPR. -; 64BIT-DAG: ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: $x3 = LI8 1 -; 64BIT-DAG: $x4 = LI8 2 -; 64BIT-DAG: $x5 = LI8 3 -; 64BIT-DAG: $x6 = LI8 4 -; 64BIT-DAG: $x7 = LI8 5 -; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load (s64) from @d) -; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $f2 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load (s32) from @f) -; 64BIT-DAG: STFS renamable $f2, 112, $x1 :: (store (s32)) -; 64BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) -; 64BIT-DAG: renamable $x10 = LD 0, %stack.0 :: (load (s64) from %stack.0) -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit $f1, implicit $x10, implicit $f2, implicit $x2, implicit-def $r1 - -; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 - -; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; ASM64PWR4: stdu 1, -128(1) -; ASM64PWR4-DAG: li 3, 1 -; ASM64PWR4-DAG: li 4, 2 -; ASM64PWR4-DAG: li 5, 3 -; ASM64PWR4-DAG: li 6, 4 -; ASM64PWR4-DAG: li 7, 5 -; ASM64PWR4-DAG: li 8, 6 -; ASM64PWR4-DAG: li 9, 7 -; ASM64PWR4-DAG: ld [[REGD:[0-9]+]], L..C8(2) -; ASM64PWR4-DAG: lfd 1, 0([[REGD]]) -; ASM64PWR4-DAG: ld [[REGF:[0-9]+]], L..C7(2) -; ASM64PWR4-DAG: lfs 2, 0([[REGF]]) -; ASM64PWR4-DAG: stfs 2, 112(1) -; ASM64PWR4-DAG: stfd 1, 120(1) -; ASM64PWR4-DAG: ld 10, 120(1) -; ASM64PWR4-NEXT: bl .test_stackarg_float3[PR] -; ASM64PWR4-NEXT: nop -; ASM64PWR4-NEXT: addi 1, 1, 128 - define i64 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i64 %ll9, i16 signext %s10, i8 zeroext %c11, i32 %ui12, i32 %si13, i64 %ll14, i8 zeroext %uc15, i32 %i16) { +; ASM32PWR4-LABEL: test_ints_stack: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: add 3, 3, 4 +; ASM32PWR4-NEXT: lwz 11, 92(1) +; ASM32PWR4-NEXT: add 3, 3, 5 +; ASM32PWR4-NEXT: add 3, 3, 6 +; ASM32PWR4-NEXT: add 3, 3, 7 +; ASM32PWR4-NEXT: lwz 12, 76(1) +; ASM32PWR4-NEXT: add 3, 3, 8 +; ASM32PWR4-NEXT: add 3, 3, 9 +; ASM32PWR4-NEXT: lwz 6, 60(1) +; ASM32PWR4-NEXT: add 3, 3, 10 +; ASM32PWR4-NEXT: srawi 5, 11, 31 +; ASM32PWR4-NEXT: srawi 8, 3, 31 +; ASM32PWR4-NEXT: lwz 4, 64(1) +; ASM32PWR4-NEXT: lwz 7, 56(1) +; ASM32PWR4-NEXT: stw 31, -4(1) # 4-byte Folded Spill +; ASM32PWR4-NEXT: srawi 31, 12, 31 +; ASM32PWR4-NEXT: addc 3, 3, 6 +; ASM32PWR4-NEXT: adde 7, 8, 7 +; ASM32PWR4-NEXT: lwz 6, 68(1) +; ASM32PWR4-NEXT: srawi 8, 4, 31 +; ASM32PWR4-NEXT: addc 3, 3, 4 +; ASM32PWR4-NEXT: adde 7, 7, 8 +; ASM32PWR4-NEXT: lwz 4, 72(1) +; ASM32PWR4-NEXT: addc 3, 3, 6 +; ASM32PWR4-NEXT: addze 6, 7 +; ASM32PWR4-NEXT: addc 3, 3, 4 +; ASM32PWR4-NEXT: lwz 0, 84(1) +; ASM32PWR4-NEXT: addze 4, 6 +; ASM32PWR4-NEXT: addc 3, 3, 12 +; ASM32PWR4-NEXT: lwz 7, 80(1) +; ASM32PWR4-NEXT: adde 4, 4, 31 +; ASM32PWR4-NEXT: addc 3, 3, 0 +; ASM32PWR4-NEXT: lwz 6, 88(1) +; ASM32PWR4-NEXT: adde 4, 4, 7 +; ASM32PWR4-NEXT: addc 3, 3, 6 +; ASM32PWR4-NEXT: lwz 31, -4(1) # 4-byte Folded Reload +; ASM32PWR4-NEXT: addze 6, 4 +; ASM32PWR4-NEXT: addc 4, 3, 11 +; ASM32PWR4-NEXT: adde 3, 6, 5 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_ints_stack: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: add 3, 3, 4 +; ASM64PWR4-NEXT: ld 4, 112(1) +; ASM64PWR4-NEXT: add 3, 3, 5 +; ASM64PWR4-NEXT: add 3, 3, 6 +; ASM64PWR4-NEXT: add 3, 3, 7 +; ASM64PWR4-NEXT: lwa 12, 124(1) +; ASM64PWR4-NEXT: add 3, 3, 8 +; ASM64PWR4-NEXT: add 3, 3, 9 +; ASM64PWR4-NEXT: add 3, 3, 10 +; ASM64PWR4-NEXT: extsw 3, 3 +; ASM64PWR4-NEXT: lwz 5, 132(1) +; ASM64PWR4-NEXT: add 3, 3, 4 +; ASM64PWR4-NEXT: add 3, 3, 12 +; ASM64PWR4-NEXT: std 31, -8(1) # 8-byte Folded Spill +; ASM64PWR4-NEXT: add 3, 3, 5 +; ASM64PWR4-NEXT: lwz 31, 140(1) +; ASM64PWR4-NEXT: lwa 11, 148(1) +; ASM64PWR4-NEXT: add 3, 3, 31 +; ASM64PWR4-NEXT: add 3, 3, 11 +; ASM64PWR4-NEXT: ld 4, 152(1) +; ASM64PWR4-NEXT: lwz 0, 164(1) +; ASM64PWR4-NEXT: add 3, 3, 4 +; ASM64PWR4-NEXT: lwa 5, 172(1) +; ASM64PWR4-NEXT: add 3, 3, 0 +; ASM64PWR4-NEXT: add 3, 3, 5 +; ASM64PWR4-NEXT: ld 31, -8(1) # 8-byte Folded Reload +; ASM64PWR4-NEXT: blr entry: %add = add nsw i32 %i1, %i2 %add1 = add nsw i32 %add, %i3 @@ -1385,79 +1280,6 @@ entry: ret i64 %add20 } -; CHECK-LABEL: name: test_ints_stack - -; 32BIT-LABEL: liveins: -; 32BIT-DAG: - { reg: '$r3', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r4', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r5', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r6', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r7', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r8', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r9', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r10', virtual-reg: '' } - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 9, type: default, offset: 56, size: 4 -; 32BIT-DAG: - { id: 8, type: default, offset: 60, size: 4 -; 32BIT-DAG: - { id: 7, type: default, offset: 64, size: 4 -; 32BIT-DAG: - { id: 6, type: default, offset: 68, size: 4 -; 32BIT-DAG: - { id: 5, type: default, offset: 72, size: 4 -; 32BIT-DAG: - { id: 4, type: default, offset: 76, size: 4 -; 32BIT-DAG: - { id: 3, type: default, offset: 80, size: 4 -; 32BIT-DAG: - { id: 2, type: default, offset: 84, size: 4 -; 32BIT-DAG: - { id: 1, type: default, offset: 88, size: 4 -; 32BIT-DAG: - { id: 0, type: default, offset: 92, size: 4 - -; 32BIT-LABEL: body: | -; 32BIT-DAG: bb.0.entry: -; 32BIT-DAG: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 - -; 64BIT-LABEL: liveins: -; 64BIT-DAG: - { reg: '$x3', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x5', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x6', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 7, type: default, offset: 112, size: 8 -; 64BIT-DAG: - { id: 6, type: default, offset: 124, size: 4 -; 64BIT-DAG: - { id: 5, type: default, offset: 132, size: 4 -; 64BIT-DAG: - { id: 4, type: default, offset: 140, size: 4 -; 64BIT-DAG: - { id: 3, type: default, offset: 148, size: 4 -; 64BIT-DAG: - { id: 2, type: default, offset: 152, size: 8 -; 64BIT-DAG: - { id: 1, type: default, offset: 164, size: 4 -; 64BIT-DAG: - { id: 0, type: default, offset: 172, size: 4 -; 64BIT-DAG: body: | -; 64BIT-DAG: bb.0.entry: -; 64BIT-DAG: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 - -; CHECKASM-LABEL: .test_ints_stack: - -; ASM32PWR4-DAG: lwz [[REG1:[0-9]+]], 56(1) -; ASM32PWR4-DAG: lwz [[REG2:[0-9]+]], 60(1) -; ASM32PWR4-DAG: lwz [[REG3:[0-9]+]], 64(1) -; ASM32PWR4-DAG: lwz [[REG4:[0-9]+]], 68(1) -; ASM32PWR4-DAG: lwz [[REG5:[0-9]+]], 72(1) -; ASM32PWR4-DAG: lwz [[REG6:[0-9]+]], 76(1) -; ASM32PWR4-DAG: lwz [[REG7:[0-9]+]], 80(1) -; ASM32PWR4-DAG: lwz [[REG8:[0-9]+]], 84(1) -; ASM32PWR4-DAG: lwz [[REG9:[0-9]+]], 88(1) -; ASM32PWR4-DAG: lwz [[REG10:[0-9]+]], 92(1) - -; ASM64PWR4-DAG: ld [[REG1:[0-9]+]], 112(1) -; ASM64PWR4-DAG: lwa [[REG2:[0-9]+]], 124(1) -; ASM64PWR4-DAG: lwz [[REG3:[0-9]+]], 132(1) -; ASM64PWR4-DAG: lwz [[REG4:[0-9]+]], 140(1) -; ASM64PWR4-DAG: lwa [[REG5:[0-9]+]], 148(1) -; ASM64PWR4-DAG: ld [[REG6:[0-9]+]], 152(1) -; ASM64PWR4-DAG: lwz [[REG7:[0-9]+]], 164(1) -; ASM64PWR4-DAG: lwa [[REG8:[0-9]+]], 172(1) - @ll1 = common global i64 0, align 8 @si1 = common global i16 0, align 2 @ch = common global i8 0, align 1 @@ -1468,6 +1290,97 @@ entry: @i1 = common global i32 0, align 4 define void @caller_ints_stack() { +; ASM32PWR4-LABEL: caller_ints_stack: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -96(1) +; ASM32PWR4-NEXT: lwz 3, L..C10(2) # @si1 +; ASM32PWR4-NEXT: stw 0, 104(1) +; ASM32PWR4-NEXT: lwz 4, L..C11(2) # @ch +; ASM32PWR4-NEXT: lwz 6, L..C12(2) # @sint +; ASM32PWR4-NEXT: lwz 8, L..C13(2) # @ll2 +; ASM32PWR4-NEXT: lwz 10, L..C14(2) # @uc1 +; ASM32PWR4-NEXT: lwz 12, L..C15(2) # @i1 +; ASM32PWR4-NEXT: lha 5, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C16(2) # @ll1 +; ASM32PWR4-NEXT: lwz 11, 0(3) +; ASM32PWR4-NEXT: lwz 7, 4(3) +; ASM32PWR4-NEXT: lwz 3, L..C17(2) # @ui +; ASM32PWR4-NEXT: lbz 4, 0(4) +; ASM32PWR4-NEXT: lwz 3, 0(3) +; ASM32PWR4-NEXT: lwz 6, 0(6) +; ASM32PWR4-NEXT: lwz 9, 0(8) +; ASM32PWR4-NEXT: lwz 8, 4(8) +; ASM32PWR4-NEXT: lbz 10, 0(10) +; ASM32PWR4-NEXT: lwz 12, 0(12) +; ASM32PWR4-NEXT: stw 10, 88(1) +; ASM32PWR4-NEXT: li 10, 8 +; ASM32PWR4-NEXT: stw 8, 84(1) +; ASM32PWR4-NEXT: li 8, 6 +; ASM32PWR4-NEXT: stw 9, 80(1) +; ASM32PWR4-NEXT: li 9, 7 +; ASM32PWR4-NEXT: stw 6, 76(1) +; ASM32PWR4-NEXT: li 6, 4 +; ASM32PWR4-NEXT: stw 3, 72(1) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: stw 4, 68(1) +; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: stw 5, 64(1) +; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: stw 7, 60(1) +; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: stw 12, 92(1) +; ASM32PWR4-NEXT: stw 11, 56(1) +; ASM32PWR4-NEXT: bl .test_ints_stack +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 96 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: caller_ints_stack: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -176(1) +; ASM64PWR4-NEXT: ld 3, L..C9(2) # @si1 +; ASM64PWR4-NEXT: std 0, 192(1) +; ASM64PWR4-NEXT: ld 4, L..C10(2) # @ch +; ASM64PWR4-NEXT: ld 6, L..C11(2) # @ll2 +; ASM64PWR4-NEXT: ld 8, L..C12(2) # @uc1 +; ASM64PWR4-NEXT: ld 9, L..C13(2) # @i1 +; ASM64PWR4-NEXT: li 10, 8 +; ASM64PWR4-NEXT: lha 7, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C14(2) # @ll1 +; ASM64PWR4-NEXT: ld 11, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C15(2) # @ui +; ASM64PWR4-NEXT: lbz 5, 0(4) +; ASM64PWR4-NEXT: ld 4, L..C16(2) # @sint +; ASM64PWR4-NEXT: lwz 3, 0(3) +; ASM64PWR4-NEXT: lwz 4, 0(4) +; ASM64PWR4-NEXT: ld 6, 0(6) +; ASM64PWR4-NEXT: lbz 8, 0(8) +; ASM64PWR4-NEXT: lwz 9, 0(9) +; ASM64PWR4-NEXT: std 9, 168(1) +; ASM64PWR4-NEXT: li 9, 7 +; ASM64PWR4-NEXT: std 8, 160(1) +; ASM64PWR4-NEXT: li 8, 6 +; ASM64PWR4-NEXT: std 6, 152(1) +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: std 4, 144(1) +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: std 3, 136(1) +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: std 5, 128(1) +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: std 7, 120(1) +; ASM64PWR4-NEXT: li 7, 5 +; ASM64PWR4-NEXT: std 11, 112(1) +; ASM64PWR4-NEXT: bl .test_ints_stack +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 176 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load i64, ptr @ll1, align 8 %1 = load i16, ptr @si1, align 2 @@ -1481,267 +1394,123 @@ entry: ret void } -; CHECK-LABEL: name: caller_ints_stack - -; 32BIT-DAG: $r3 = LI 1 -; 32BIT-DAG: $r4 = LI 2 -; 32BIT-DAG: $r5 = LI 3 -; 32BIT-DAG: $r6 = LI 4 -; 32BIT-DAG: $r7 = LI 5 -; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: renamable $r[[REGLL1ADDR:[0-9]+]] = LWZtoc @ll1, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGLL1A:[0-9]+]] = LWZ 0, renamable $r[[REGLL1ADDR]] :: (dereferenceable load (s32) from @ll1, align 8) -; 32BIT-DAG: renamable $r[[REGLL1B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL1ADDR]] :: (dereferenceable load (s32) from @ll1 + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REGLL1A]], 56, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REGLL1B]], 60, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si1, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load (s16) from @si1) -; 32BIT-DAG: STW killed renamable $r[[REGSI]], 64, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGCHADDR:[0-9]+]] = LWZtoc @ch, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGCH:[0-9]+]] = LBZ 0, killed renamable $r[[REGCHADDR]] :: (dereferenceable load (s8) from @ch) -; 32BIT-DAG: STW killed renamable $r[[REGCH]], 68, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGUIADDR:[0-9]+]] = LWZtoc @ui, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGUI:[0-9]+]] = LWZ 0, killed renamable $r[[REGUIADDR]] :: (dereferenceable load (s32) from @ui) -; 32BIT-DAG: STW killed renamable $r[[REGUI]], 72, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @sint, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LWZ 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load (s32) from @sint) -; 32BIT-DAG: STW killed renamable $r[[REGSI]], 76, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGLL2ADDR:[0-9]+]] = LWZtoc @ll2, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGLL2A:[0-9]+]] = LWZ 0, renamable $r[[REGLL2ADDR]] :: (dereferenceable load (s32) from @ll2, align 8) -; 32BIT-DAG: renamable $r[[REGLL2B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL2ADDR]] :: (dereferenceable load (s32) from @ll2 + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REGLL2A]], 80, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REGLL2B]], 84, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGUCADDR:[0-9]+]] = LWZtoc @uc1, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGUC:[0-9]+]] = LBZ 0, killed renamable $r[[REGUCADDR]] :: (dereferenceable load (s8) from @uc1) -; 32BIT-DAG: STW killed renamable $r[[REGUC]], 88, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i1, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load (s32) from @i1) -; 32BIT-DAG: STW killed renamable $r[[REGI]], 92, $r1 :: (store (s32)) -; 32BIT-DAG: ADJCALLSTACKDOWN 96, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def dead $r3 -; 32BIT-NEXT: ADJCALLSTACKUP 96, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT-DAG: $x3 = LI8 1 -; 64BIT-DAG: $x4 = LI8 2 -; 64BIT-DAG: $x5 = LI8 3 -; 64BIT-DAG: $x6 = LI8 4 -; 64BIT-DAG: $x7 = LI8 5 -; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: renamable $x[[REGLL1ADDR:[0-9]+]] = LDtoc @ll1, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGLL1:[0-9]+]] = LD 0, killed renamable $x[[REGLL1ADDR]] :: (dereferenceable load (s64) from @ll1) -; 64BIT-DAG: STD killed renamable $x[[REGLL1]], 112, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @si1, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LHA8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load (s16) from @si1) -; 64BIT-DAG: STD killed renamable $x[[REGSI]], 120, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGCHADDR:[0-9]+]] = LDtoc @ch, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGCH:[0-9]+]] = LBZ8 0, killed renamable $x[[REGCHADDR]] :: (dereferenceable load (s8) from @ch) -; 64BIT-DAG: STD killed renamable $x[[REGCH]], 128, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGUIADDR:[0-9]+]] = LDtoc @ui, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGUI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGUIADDR]] :: (dereferenceable load (s32) from @ui) -; 64BIT-DAG: STD killed renamable $x[[REGUI]], 136, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @sint, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load (s32) from @sint) -; 64BIT-DAG: STD killed renamable $x[[REGSI]], 144, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGLL2ADDR:[0-9]+]] = LDtoc @ll2, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGLL2:[0-9]+]] = LD 0, killed renamable $x[[REGLL2ADDR]] :: (dereferenceable load (s64) from @ll2) -; 64BIT-DAG: STD killed renamable $x[[REGLL2]], 152, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGUCADDR:[0-9]+]] = LDtoc @uc1, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGUC:[0-9]+]] = LBZ8 0, killed renamable $x[[REGUCADDR]] :: (dereferenceable load (s8) from @uc1) -; 64BIT-DAG: STD killed renamable $x[[REGUC]], 160, $x1 :: (store (s64)) -; 64BIT-DAG: renamable $x[[REGIADDR:[0-9]+]] = LDtoc @i1, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGIADDR]] :: (dereferenceable load (s32) from @i1) -; 64BIT-DAG: STD killed renamable $x[[REGI]], 168, $x1 :: (store (s64)) -; 64BIT-DAG: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 -; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm - -; CHECKASM-LABEL: .caller_ints_stack: - -; ASM32PWR4: mflr 0 -; ASM32PWR4-NEXT: stwu 1, -96(1) -; ASM32PWR4-DAG: stw 0, 104(1) -; ASM32PWR4-DAG: li 3, 1 -; ASM32PWR4-DAG: li 4, 2 -; ASM32PWR4-DAG: li 5, 3 -; ASM32PWR4-DAG: li 6, 4 -; ASM32PWR4-DAG: li 7, 5 -; ASM32PWR4-DAG: li 9, 7 -; ASM32PWR4-DAG: li 8, 6 -; ASM32PWR4-DAG: li 10, 8 -; ASM32PWR4-DAG: lwz [[REG1:[0-9]+]], L..C10(2) -; ASM32PWR4-DAG: lwz [[REG2:[0-9]+]], L..C11(2) -; ASM32PWR4-DAG: lwz [[REG3:[0-9]+]], L..C12(2) -; ASM32PWR4-DAG: lwz [[REG4:[0-9]+]], L..C13(2) -; ASM32PWR4-DAG: lwz [[REG5:[0-9]+]], L..C14(2) -; ASM32PWR4-DAG: lwz [[REG6:[0-9]+]], L..C15(2) -; ASM32PWR4-DAG: lwz [[REG7:[0-9]+]], L..C16(2) -; ASM32PWR4-DAG: lwz [[REG8:[0-9]+]], L..C17(2) -; ASM32PWR4-DAG: lha 5, 0([[REG1]]) -; ASM32PWR4-DAG: lwz 11, 0([[REG7]]) -; ASM32PWR4-DAG: lwz 7, 4([[REG7]]) -; ASM32PWR4-DAG: lbz 4, 0([[REG2]]) -; ASM32PWR4-DAG: lwz 3, 0([[REG8]]) -; ASM32PWR4-DAG: lwz 6, 0([[REG3]]) -; ASM32PWR4-DAG: lwz 9, 0([[REG4]]) -; ASM32PWR4-DAG: lwz 8, 4([[REG4]]) -; ASM32PWR4-DAG: lbz 10, 0([[REG5]]) -; ASM32PWR4-DAG: lwz 12, 0([[REG6]]) -; ASM32PWR4-DAG: stw 11, 56(1) -; ASM32PWR4-DAG: stw 7, 60(1) -; ASM32PWR4-DAG: stw 5, 64(1) -; ASM32PWR4-DAG: stw 4, 68(1) -; ASM32PWR4-DAG: stw 3, 72(1) -; ASM32PWR4-DAG: stw 6, 76(1) -; ASM32PWR4-DAG: stw 9, 80(1) -; ASM32PWR4-DAG: stw 8, 84(1) -; ASM32PWR4-DAG: stw 10, 88(1) -; ASM32PWR4-DAG: stw 12, 92(1) -; ASM32PWR4-DAG: bl .test_ints_stack -; ASM32PWR4-DAG: nop -; ASM32PWR4-DAG: addi 1, 1, 96 -; ASM32PWR4-DAG: lwz 0, 8(1) -; ASM32PWR4-NEXT: mtlr 0 -; ASM32PWR4-NEXT: blr - -; ASM64PWR4: mflr 0 -; ASM64PWR4-NEXT: stdu 1, -176(1) -; ASM64PWR4-DAG: std 0, 192(1) -; ASM64PWR4-DAG: li 3, 1 -; ASM64PWR4-DAG: li 4, 2 -; ASM64PWR4-DAG: li 5, 3 -; ASM64PWR4-DAG: li 6, 4 -; ASM64PWR4-DAG: li 7, 5 -; ASM64PWR4-DAG: li 8, 6 -; ASM64PWR4-DAG: li 9, 7 -; ASM64PWR4-DAG: li 10, 8 -; ASM64PWR4-DAG: ld [[REG1:[0-9]+]], L..C9(2) -; ASM64PWR4-DAG: ld [[REG2:[0-9]+]], L..C10(2) -; ASM64PWR4-DAG: ld [[REG3:[0-9]+]], L..C11(2) -; ASM64PWR4-DAG: ld [[REG4:[0-9]+]], L..C12(2) -; ASM64PWR4-DAG: ld [[REG5:[0-9]+]], L..C13(2) -; ASM64PWR4-DAG: ld [[REG6:[0-9]+]], L..C14(2) -; ASM64PWR4-DAG: ld [[REG7:[0-9]+]], L..C15(2) -; ASM64PWR4-DAG: ld [[REG8:[0-9]+]], L..C16(2) -; ASM64PWR4-DAG: lha 7, 0([[REG1]]) -; ASM64PWR4-DAG: lbz 5, 0([[REG2]]) -; ASM64PWR4-DAG: ld 6, 0([[REG3]]) -; ASM64PWR4-DAG: lbz 8, 0([[REG4]]) -; ASM64PWR4-DAG: lwz 9, 0([[REG5]]) -; ASM64PWR4-DAG: ld 11, 0([[REG6]]) -; ASM64PWR4-DAG: lwz 3, 0([[REG7]]) -; ASM64PWR4-DAG: lwz 4, 0([[REG8]]) -; ASM64PWR4-DAG: std 11, 112(1) -; ASM64PWR4-DAG: std 7, 120(1) -; ASM64PWR4-DAG: std 5, 128(1) -; ASM64PWR4-DAG: std 3, 136(1) -; ASM64PWR4-DAG: std 4, 144(1) -; ASM64PWR4-DAG: std 6, 152(1) -; ASM64PWR4-DAG: std 8, 160(1) -; ASM64PWR4-DAG: std 9, 168(1) -; ASM64PWR4-NEXT: bl .test_ints_stack -; ASM64PWR4-NEXT: nop -; ASM64PWR4-NEXT: addi 1, 1, 176 -; ASM64PWR4-NEXT: ld 0, 16(1) -; ASM64PWR4-NEXT: mtlr 0 -; ASM64PWR4-NEXT: blr - @globali1 = global i8 0, align 1 define void @test_i1_stack(i32 %a, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i1 zeroext %b) { +; ASM32PWR4-LABEL: test_i1_stack: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: lbz 3, 59(1) +; ASM32PWR4-NEXT: lwz 4, L..C18(2) # @globali1 +; ASM32PWR4-NEXT: stb 3, 0(4) +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_i1_stack: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: lbz 3, 119(1) +; ASM64PWR4-NEXT: ld 4, L..C17(2) # @globali1 +; ASM64PWR4-NEXT: stb 3, 0(4) +; ASM64PWR4-NEXT: blr entry: %frombool = zext i1 %b to i8 store i8 %frombool, ptr @globali1, align 1 ret void } -; CHECK-LABEL: name: test_i1_stack - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 0, type: default, offset: 59, size: 1 -; 32BIT-DAG: body: | -; 32BIT-DAG: bb.0.entry: -; 32BIT-DAG: renamable $r[[REGB:[0-9]+]] = LBZ 0, %fixed-stack.0 :: (load (s8) from %fixed-stack.0) -; 32BIT-DAG: renamable $r[[REGBTOC:[0-9]+]] = LWZtoc @globali1, $r2 :: (load (s32) from got) -; 32BIT-DAG: STB killed renamable $r[[REGB]], 0, killed renamable $r[[REGBTOC]] :: (store (s8) into @globali1) - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 0, type: default, offset: 119, size: 1 -; 64BIT-DAG: body: | -; 64BIT-DAG: bb.0.entry: -; 64BIT-DAG: renamable $r[[REGB:[0-9]+]] = LBZ 0, %fixed-stack.0 :: (load (s8) from %fixed-stack.0) -; 64BIT-DAG: renamable $x[[REGBTOC:[0-9]+]] = LDtoc @globali1, $x2 :: (load (s64) from got) -; 64BIT-DAG: STB killed renamable $r[[SCRATCHREG:[0-9]+]], 0, killed renamable $x[[REGBTOC]] :: (store (s8) into @globali1) -; 64BIT-DAG: BLR8 implicit $lr8, implicit $rm - -; CHECKASM-LABEL: test_i1_stack: - -; ASM32PWR4-DAG: lbz [[REGB:[0-9]+]], 59(1) -; ASM32PWR4-DAG: lwz [[REGBTOC:[0-9]+]], L..C18(2) -; ASM32PWR4-DAG: stb [[SCRATCHREG:[0-9]+]], 0([[REGBTOC]]) -; ASM32PWR4-DAG: blr - -; ASM64PWR4-DAG: lbz [[REGB:[0-9]+]], 119(1) -; ASM64PWR4-DAG: ld [[REGBTOC:[0-9]+]], L..C17(2) -; ASM64PWR4-DAG: stb [[SCRATCHREG:[0-9]+]], 0([[REGBTOC]]) -; ASM64PWR4-DAG: blr - define void @call_test_i1_stack() { +; ASM32PWR4-LABEL: call_test_i1_stack: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -64(1) +; ASM32PWR4-NEXT: li 11, 1 +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: stw 0, 72(1) +; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: stw 11, 56(1) +; ASM32PWR4-NEXT: li 6, 4 +; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: li 8, 6 +; ASM32PWR4-NEXT: li 9, 7 +; ASM32PWR4-NEXT: li 10, 8 +; ASM32PWR4-NEXT: bl .test_i1_stack +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: call_test_i1_stack: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: li 11, 1 +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: std 11, 112(1) +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: li 7, 5 +; ASM64PWR4-NEXT: li 8, 6 +; ASM64PWR4-NEXT: li 9, 7 +; ASM64PWR4-NEXT: li 10, 8 +; ASM64PWR4-NEXT: bl .test_i1_stack +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: call void @test_i1_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i1 true) ret void } -; CHECK-LABEL: name: call_test_i1_stack - -; 32BIT-DAG: ADJCALLSTACKDOWN 60, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: $r3 = LI 1 -; 32BIT-DAG: $r4 = LI 2 -; 32BIT-DAG: $r5 = LI 3 -; 32BIT-DAG: $r6 = LI 4 -; 32BIT-DAG: $r7 = LI 5 -; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: renamable $r[[REGBOOLADDR:[0-9]+]] = LI 1 -; 32BIT-DAG: STW killed renamable $r[[REGBOOLADDR]], 56, $r1 :: (store (s32)) -; 32BIT-DAG: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 -; 32BIT-DAG: ADJCALLSTACKUP 60, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT-DAG: ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: $x3 = LI8 1 -; 64BIT-DAG: $x4 = LI8 2 -; 64BIT-DAG: $x5 = LI8 3 -; 64BIT-DAG: $x6 = LI8 4 -; 64BIT-DAG: $x7 = LI8 5 -; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: renamable $x[[REGBOOLADDR:[0-9]+]] = LI8 1 -; 64BIT-DAG: STD killed renamable $x[[REGBOOLADDR]], 112, $x1 :: (store (s64)) -; 64BIT-DAG: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 -; 64BIT-DAG: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 - -; CHECKASM-LABEL: .call_test_i1_stack: - -; ASM32PWR4-DAG: mflr 0 -; ASM32PWR4-DAG: li 3, 1 -; ASM32PWR4-DAG: li 4, 2 -; ASM32PWR4-DAG: li 5, 3 -; ASM32PWR4-DAG: li 6, 4 -; ASM32PWR4-DAG: li 7, 5 -; ASM32PWR4-DAG: li 8, 6 -; ASM32PWR4-DAG: li 9, 7 -; ASM32PWR4-DAG: li 10, 8 -; ASM32PWR4-DAG: stw [[REGB:[0-9]+]], 56(1) -; ASM32PWR4-DAG: li [[REGB]], 1 -; ASM32PWR4-DAG: bl .test_i1 - define double @test_fpr_stack(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %s10, double %l11, double %d12, double %d13, float %f14, double %d15, float %f16) { +; ASM32PWR4-LABEL: test_fpr_stack: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: fadd 0, 1, 2 +; ASM32PWR4-NEXT: lfs 1, 128(1) +; ASM32PWR4-NEXT: fadd 0, 0, 3 +; ASM32PWR4-NEXT: lfd 2, 132(1) +; ASM32PWR4-NEXT: fadd 0, 0, 4 +; ASM32PWR4-NEXT: fadd 0, 0, 5 +; ASM32PWR4-NEXT: fadd 0, 0, 6 +; ASM32PWR4-NEXT: fadd 0, 0, 7 +; ASM32PWR4-NEXT: fadd 0, 0, 8 +; ASM32PWR4-NEXT: fadd 0, 0, 9 +; ASM32PWR4-NEXT: fadd 0, 0, 10 +; ASM32PWR4-NEXT: fadd 0, 0, 11 +; ASM32PWR4-NEXT: fadd 0, 0, 12 +; ASM32PWR4-NEXT: fadd 0, 0, 13 +; ASM32PWR4-NEXT: fadd 0, 0, 13 +; ASM32PWR4-NEXT: fadd 0, 0, 1 +; ASM32PWR4-NEXT: lfs 1, 140(1) +; ASM32PWR4-NEXT: fadd 0, 0, 2 +; ASM32PWR4-NEXT: fadd 1, 0, 1 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: test_fpr_stack: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: fadd 0, 1, 2 +; ASM64PWR4-NEXT: lfs 1, 152(1) +; ASM64PWR4-NEXT: fadd 0, 0, 3 +; ASM64PWR4-NEXT: lfd 2, 160(1) +; ASM64PWR4-NEXT: fadd 0, 0, 4 +; ASM64PWR4-NEXT: fadd 0, 0, 5 +; ASM64PWR4-NEXT: fadd 0, 0, 6 +; ASM64PWR4-NEXT: fadd 0, 0, 7 +; ASM64PWR4-NEXT: fadd 0, 0, 8 +; ASM64PWR4-NEXT: fadd 0, 0, 9 +; ASM64PWR4-NEXT: fadd 0, 0, 10 +; ASM64PWR4-NEXT: fadd 0, 0, 11 +; ASM64PWR4-NEXT: fadd 0, 0, 12 +; ASM64PWR4-NEXT: fadd 0, 0, 13 +; ASM64PWR4-NEXT: fadd 0, 0, 13 +; ASM64PWR4-NEXT: fadd 0, 0, 1 +; ASM64PWR4-NEXT: lfs 1, 168(1) +; ASM64PWR4-NEXT: fadd 0, 0, 2 +; ASM64PWR4-NEXT: fadd 1, 0, 1 +; ASM64PWR4-NEXT: blr entry: %add = fadd double %d1, %d2 %add1 = fadd double %add, %d3 @@ -1764,57 +1533,182 @@ define double @test_fpr_stack(double %d1, double %d2, double %d3, double %d4, do ret double %add16 } -; CHECK-LABEL: name: test_fpr_stack{{.*}} - -; CHECK-LABEL: liveins: -; CHECK-DAG: - { reg: '$f1', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f2', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f3', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f4', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f5', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f6', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f7', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f8', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f9', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f10', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f11', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f12', virtual-reg: '' } -; CHECK-DAG: - { reg: '$f13', virtual-reg: '' } - -; CHECK-LABEL: fixedStack: -; 32BIT-DAG: - { id: 2, type: default, offset: 128, size: 4 -; 32BIT-DAG: - { id: 1, type: default, offset: 132, size: 8 -; 32BIT-DAG: - { id: 0, type: default, offset: 140, size: 4 - -; 64BIT-DAG: - { id: 2, type: default, offset: 152, size: 4 -; 64BIT-DAG: - { id: 1, type: default, offset: 160, size: 8 -; 64BIT-DAG: - { id: 0, type: default, offset: 168, size: 4 - -; CHECK-LABEL: body: | -; CHECK-DAG: bb.0.entry: -; CHECK-DAG: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 - -; CHECKASM-LABEL: .test_fpr_stack: - -; ASM32PWR4-DAG: lfs [[REG1:[0-9]+]], 128(1) -; ASM32PWR4-DAG: lfd [[REG2:[0-9]+]], 132(1) -; ASM32PWR4-DAG: lfs [[REG3:[0-9]+]], 140(1) -; ASM32PWR4-DAG: fadd 0, 0, [[REG1]] -; ASM32PWR4-DAG: fadd 0, 0, [[REG2]] -; ASM32PWR4-DAG: fadd 1, 0, [[REG3]] - -; ASM64PWR4-DAG: lfs [[REG1:[0-9]+]], 152(1) -; ASM64PWR4-DAG: lfd [[REG2:[0-9]+]], 160(1) -; ASM64PWR4-DAG: lfs [[REG3:[0-9]+]], 168(1) -; ASM64PWR4-DAG: fadd 0, 0, [[REG1]] -; ASM64PWR4-DAG: fadd 0, 0, [[REG2]] -; ASM64PWR4-DAG: fadd 1, 0, [[REG3]] - @f14 = common global float 0.000000e+00, align 4 @d15 = common global double 0.000000e+00, align 8 @f16 = common global float 0.000000e+00, align 4 define void @caller_fpr_stack() { +; ASM32PWR4-LABEL: caller_fpr_stack: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -144(1) +; ASM32PWR4-NEXT: lwz 3, L..C19(2) # @d15 +; ASM32PWR4-NEXT: lwz 4, L..C20(2) # @f14 +; ASM32PWR4-NEXT: lwz 5, L..C21(2) # @f16 +; ASM32PWR4-NEXT: stw 0, 152(1) +; ASM32PWR4-NEXT: lis 6, 16361 +; ASM32PWR4-NEXT: ori 6, 6, 39321 +; ASM32PWR4-NEXT: lfd 0, 0(3) +; ASM32PWR4-NEXT: lwz 3, 0(4) +; ASM32PWR4-NEXT: lwz 4, 0(5) +; ASM32PWR4-NEXT: li 5, 0 +; ASM32PWR4-NEXT: stw 5, 60(1) +; ASM32PWR4-NEXT: lis 5, 16352 +; ASM32PWR4-NEXT: stw 5, 56(1) +; ASM32PWR4-NEXT: lis 5, 13107 +; ASM32PWR4-NEXT: ori 5, 5, 13107 +; ASM32PWR4-NEXT: stw 5, 68(1) +; ASM32PWR4-NEXT: lis 5, 16355 +; ASM32PWR4-NEXT: ori 5, 5, 13107 +; ASM32PWR4-NEXT: stw 5, 64(1) +; ASM32PWR4-NEXT: lis 5, 26214 +; ASM32PWR4-NEXT: ori 5, 5, 26214 +; ASM32PWR4-NEXT: stw 5, 76(1) +; ASM32PWR4-NEXT: lis 5, 16358 +; ASM32PWR4-NEXT: ori 5, 5, 26214 +; ASM32PWR4-NEXT: stw 5, 72(1) +; ASM32PWR4-NEXT: lis 5, -26215 +; ASM32PWR4-NEXT: ori 5, 5, 39322 +; ASM32PWR4-NEXT: stw 5, 84(1) +; ASM32PWR4-NEXT: stw 5, 100(1) +; ASM32PWR4-NEXT: lis 5, 16313 +; ASM32PWR4-NEXT: ori 5, 5, 39321 +; ASM32PWR4-NEXT: stw 5, 96(1) +; ASM32PWR4-NEXT: lis 5, -15729 +; ASM32PWR4-NEXT: ori 5, 5, 23593 +; ASM32PWR4-NEXT: stw 5, 108(1) +; ASM32PWR4-NEXT: lis 5, 16316 +; ASM32PWR4-NEXT: ori 5, 5, 10485 +; ASM32PWR4-NEXT: stw 5, 104(1) +; ASM32PWR4-NEXT: lis 5, -5243 +; ASM32PWR4-NEXT: ori 5, 5, 7864 +; ASM32PWR4-NEXT: stw 5, 116(1) +; ASM32PWR4-NEXT: lis 5, 16318 +; ASM32PWR4-NEXT: ori 5, 5, 47185 +; ASM32PWR4-NEXT: stw 6, 80(1) +; ASM32PWR4-NEXT: lis 6, -13108 +; ASM32PWR4-NEXT: ori 6, 6, 52429 +; ASM32PWR4-NEXT: stw 5, 112(1) +; ASM32PWR4-NEXT: lis 5, 2621 +; ASM32PWR4-NEXT: ori 5, 5, 28836 +; ASM32PWR4-NEXT: stw 6, 92(1) +; ASM32PWR4-NEXT: lis 6, 16364 +; ASM32PWR4-NEXT: ori 6, 6, 52428 +; ASM32PWR4-NEXT: stw 5, 124(1) +; ASM32PWR4-NEXT: lis 5, 16320 +; ASM32PWR4-NEXT: ori 5, 5, 41943 +; ASM32PWR4-NEXT: stw 6, 88(1) +; ASM32PWR4-NEXT: lwz 6, L..C22(2) # %const.0 +; ASM32PWR4-NEXT: stw 5, 120(1) +; ASM32PWR4-NEXT: lwz 5, L..C23(2) # %const.1 +; ASM32PWR4-NEXT: lfd 2, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C24(2) # %const.2 +; ASM32PWR4-NEXT: lfd 3, 0(5) +; ASM32PWR4-NEXT: lwz 5, L..C25(2) # %const.3 +; ASM32PWR4-NEXT: lfd 4, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C26(2) # %const.4 +; ASM32PWR4-NEXT: lfd 6, 0(5) +; ASM32PWR4-NEXT: lwz 5, L..C27(2) # %const.5 +; ASM32PWR4-NEXT: lfd 7, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C28(2) # %const.6 +; ASM32PWR4-NEXT: lfd 8, 0(5) +; ASM32PWR4-NEXT: lwz 5, L..C29(2) # %const.7 +; ASM32PWR4-NEXT: lfd 9, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C30(2) # %const.8 +; ASM32PWR4-NEXT: lfd 1, 0(5) +; ASM32PWR4-NEXT: lwz 5, L..C31(2) # %const.9 +; ASM32PWR4-NEXT: lfd 11, 0(6) +; ASM32PWR4-NEXT: lwz 6, L..C32(2) # %const.10 +; ASM32PWR4-NEXT: fmr 10, 1 +; ASM32PWR4-NEXT: lfd 12, 0(5) +; ASM32PWR4-NEXT: lwz 5, L..C33(2) # %const.11 +; ASM32PWR4-NEXT: lfd 13, 0(6) +; ASM32PWR4-NEXT: lfs 5, 0(5) +; ASM32PWR4-NEXT: stfd 0, 132(1) +; ASM32PWR4-NEXT: stw 4, 140(1) +; ASM32PWR4-NEXT: stw 3, 128(1) +; ASM32PWR4-NEXT: bl .test_fpr_stack +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 144 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: caller_fpr_stack: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -176(1) +; ASM64PWR4-NEXT: ld 3, L..C18(2) # @f14 +; ASM64PWR4-NEXT: std 0, 192(1) +; ASM64PWR4-NEXT: ld 4, L..C19(2) # @d15 +; ASM64PWR4-NEXT: ld 5, L..C20(2) # @f16 +; ASM64PWR4-NEXT: ld 6, L..C21(2) # %const.9 +; ASM64PWR4-NEXT: lis 7, 16313 +; ASM64PWR4-NEXT: lwz 3, 0(3) +; ASM64PWR4-NEXT: ld 4, 0(4) +; ASM64PWR4-NEXT: lwz 5, 0(5) +; ASM64PWR4-NEXT: stw 3, 152(1) +; ASM64PWR4-NEXT: ld 3, L..C22(2) # %const.0 +; ASM64PWR4-NEXT: std 4, 160(1) +; ASM64PWR4-NEXT: ld 4, L..C23(2) # %const.1 +; ASM64PWR4-NEXT: lfd 2, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C24(2) # %const.2 +; ASM64PWR4-NEXT: lfd 3, 0(4) +; ASM64PWR4-NEXT: ld 4, L..C25(2) # %const.3 +; ASM64PWR4-NEXT: lfd 4, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C26(2) # %const.4 +; ASM64PWR4-NEXT: lfd 6, 0(4) +; ASM64PWR4-NEXT: ld 4, L..C27(2) # %const.5 +; ASM64PWR4-NEXT: lfd 7, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C28(2) # %const.6 +; ASM64PWR4-NEXT: lfd 8, 0(4) +; ASM64PWR4-NEXT: ld 4, L..C29(2) # %const.7 +; ASM64PWR4-NEXT: lfd 9, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C30(2) # %const.8 +; ASM64PWR4-NEXT: lfd 1, 0(4) +; ASM64PWR4-NEXT: lis 4, 16320 +; ASM64PWR4-NEXT: ori 4, 4, 41943 +; ASM64PWR4-NEXT: rldic 4, 4, 32, 2 +; ASM64PWR4-NEXT: lfd 11, 0(3) +; ASM64PWR4-NEXT: lis 3, 16316 +; ASM64PWR4-NEXT: fmr 10, 1 +; ASM64PWR4-NEXT: ori 3, 3, 10485 +; ASM64PWR4-NEXT: oris 4, 4, 2621 +; ASM64PWR4-NEXT: stw 5, 168(1) +; ASM64PWR4-NEXT: lis 5, 16318 +; ASM64PWR4-NEXT: rldic 3, 3, 32, 2 +; ASM64PWR4-NEXT: ori 5, 5, 47185 +; ASM64PWR4-NEXT: ori 4, 4, 28836 +; ASM64PWR4-NEXT: lfd 12, 0(6) +; ASM64PWR4-NEXT: ld 6, L..C31(2) # %const.10 +; ASM64PWR4-NEXT: oris 3, 3, 49807 +; ASM64PWR4-NEXT: ori 3, 3, 23593 +; ASM64PWR4-NEXT: std 4, 144(1) +; ASM64PWR4-NEXT: rldic 4, 5, 32, 2 +; ASM64PWR4-NEXT: oris 4, 4, 60293 +; ASM64PWR4-NEXT: ori 4, 4, 7864 +; ASM64PWR4-NEXT: std 3, 128(1) +; ASM64PWR4-NEXT: ld 3, L..C32(2) # %const.11 +; ASM64PWR4-NEXT: ori 5, 7, 39321 +; ASM64PWR4-NEXT: rldic 5, 5, 32, 2 +; ASM64PWR4-NEXT: std 4, 136(1) +; ASM64PWR4-NEXT: lis 4, 4091 +; ASM64PWR4-NEXT: ori 4, 4, 13107 +; ASM64PWR4-NEXT: rldic 4, 4, 34, 2 +; ASM64PWR4-NEXT: lfs 5, 0(3) +; ASM64PWR4-NEXT: oris 3, 5, 39321 +; ASM64PWR4-NEXT: ori 3, 3, 39322 +; ASM64PWR4-NEXT: lfd 13, 0(6) +; ASM64PWR4-NEXT: std 3, 120(1) +; ASM64PWR4-NEXT: oris 3, 4, 52428 +; ASM64PWR4-NEXT: ori 3, 3, 52429 +; ASM64PWR4-NEXT: std 3, 112(1) +; ASM64PWR4-NEXT: bl .test_fpr_stack +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 176 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %0 = load float, ptr @f14, align 4 %1 = load double, ptr @d15, align 8 @@ -1823,152 +1717,60 @@ entry: ret void } -; CHECK-LABEL: caller_fpr_stack - -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.0, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.1, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.2, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.3, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.4, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.5, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.6, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.7, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.8, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.9, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.10, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.11, $r2 :: (load (s32) from got) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 56, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 64, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 72, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 80, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[SCRATCHREG:[0-9]+]], 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 88, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 96, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 104, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 112, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 120, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 128, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGF1:[0-9]+]] = LWZtoc @f14, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGF1]] :: (load (s32) from @f14) -; 32BIT-DAG: STFD killed renamable $f0, 132, $r1 :: (store (s64)) -; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d15, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f0 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load (s64) from @d15) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 140, $r1 :: (store (s32)) -; 32BIT-DAG: renamable $r[[REGF2:[0-9]+]] = LWZtoc @f16, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZ 0, killed renamable $r[[REGF2]] :: (load (s32) from @f16) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f3 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f4 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f5 = LFS 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s32) from constant-pool) -; 32BIT-DAG: renamable $f6 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f7 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f8 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f9 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: $f10 = COPY renamable $f1 -; 32BIT-DAG: renamable $f11 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f12 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $f13 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 -; 32BIT-NEXT: ADJCALLSTACKUP 144, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.1, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.2, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.[[SCRATCHREG:[0-9]+]], $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.4, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.5, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.6, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.7, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.8, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.9, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.10, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[REGF1:[0-9]+]] = LDtoc @f14, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $r3 = LWZ 0, killed renamable $x[[REGF1]] :: (load (s32) from @f14) -; 64BIT-DAG: renamable $x[[REGF2:[0-9]+]] = LDtoc @f16, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $r5 = LWZ 0, killed renamable $x[[REGF2]] :: (load (s32) from @f16) -; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d15, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x4 = LD 0, killed renamable $x[[REGD]] :: (load (s64) from @d15) -; 64BIT-DAG: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f[[SCRATCHREG:[0-9]+]] = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f4 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f5 = LFS 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s32) from constant-pool) -; 64BIT-DAG: renamable $f6 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f7 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f8 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f9 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: $f10 = COPY renamable $f1 -; 64BIT-DAG: renamable $f11 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f12 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f13 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $f1 -; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm - -; CHECKASM-LABEL: .caller_fpr_stack: - -; ASM32PWR4: mflr 0 -; ASM32PWR4-NEXT: stwu 1, -144(1) -; ASM32PWR4-DAG: stw 0, 152(1) -; ASM32PWR4-DAG: lwz [[REGF1ADDR:[0-9]+]], L..C20(2) -; ASM32PWR4-DAG: lwz [[REGF1:[0-9]+]], 0([[REGF1ADDR]]) -; ASM32PWR4-DAG: lwz [[REGDADDR:[0-9]+]], L..C19(2) -; ASM32PWR4-DAG: lfd [[REGD:[0-9]+]], 0([[REGDADDR]]) -; ASM32PWR4-DAG: lwz [[REGF2ADDR:[0-9]+]], L..C21(2) -; ASM32PWR4-DAG: lwz [[REGF2:[0-9]+]], 0([[REGF2ADDR]]) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 56(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 60(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 64(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 68(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 72(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 76(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 80(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 84(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 88(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 92(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 96(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 100(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 108(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 104(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 112(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 116(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 120(1) -; ASM32PWR4-DAG: stw [[SCRATCHREG:[0-9]+]], 124(1) -; ASM32PWR4-DAG: stw [[REGF1]], 128(1) -; ASM32PWR4-DAG: stfd [[REGD]], 132(1) -; ASM32PWR4-DAG: stw [[REGF2]], 140(1) -; ASM32PWR4-NEXT: bl .test_fpr_stack - -; ASM64PWR4: mflr 0 -; ASM64PWR4-NEXT: stdu 1, -176(1) -; ASM64PWR4-DAG: std 0, 192(1) -; ASM64PWR4-DAG: ld [[REGF1ADDR:[0-9]+]], L..C18(2) -; ASM64PWR4-DAG: lwz [[REGF1:[0-9]+]], 0([[REGF1ADDR]]) -; ASM64PWR4-DAG: ld [[REGDADDR:[0-9]+]], L..C19(2) -; ASM64PWR4-DAG: ld [[REGD:[0-9]+]], 0([[REGDADDR]]) -; ASM64PWR4-DAG: ld [[REGF2ADDR:[0-9]+]], L..C20(2) -; ASM64PWR4-DAG: lwz [[REGF2:[0-9]+]], 0([[REGF2ADDR]]) -; ASM64PWR4-DAG: std [[SCRATCHREG:[0-9]+]], 112(1) -; ASM64PWR4-DAG: std [[SCRATCHREG:[0-9]+]], 120(1) -; ASM64PWR4-DAG: std [[SCRATCHREG:[0-9]+]], 128(1) -; ASM64PWR4-DAG: std [[SCRATCHREG:[0-9]+]], 136(1) -; ASM64PWR4-DAG: std [[SCRATCHREG:[0-9]+]], 144(1) -; ASM64PWR4-DAG: stw [[REGF1]], 152(1) -; ASM64PWR4-DAG: std [[REGD]], 160(1) -; ASM64PWR4-DAG: stw [[REGF2]], 168(1) -; ASM64PWR4-NEXT: bl .test_fpr_stack - define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroext %c1, i16 signext %s1, i64 %ll1, i32 %i1, i32 %i2, i32 %i3) { +; ASM32PWR4-LABEL: mix_callee: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: lwz 4, 60(1) +; ASM32PWR4-NEXT: lis 8, 17200 +; ASM32PWR4-NEXT: fadd 1, 1, 2 +; ASM32PWR4-NEXT: fadd 1, 1, 3 +; ASM32PWR4-NEXT: lwz 5, 56(1) +; ASM32PWR4-NEXT: lwz 3, 68(1) +; ASM32PWR4-NEXT: add 4, 5, 4 +; ASM32PWR4-NEXT: lwz 5, L..C34(2) # %const.0 +; ASM32PWR4-NEXT: fadd 1, 1, 4 +; ASM32PWR4-NEXT: lwz 6, 72(1) +; ASM32PWR4-NEXT: add 3, 4, 3 +; ASM32PWR4-NEXT: lwz 7, 76(1) +; ASM32PWR4-NEXT: add 3, 3, 6 +; ASM32PWR4-NEXT: stw 8, -16(1) +; ASM32PWR4-NEXT: add 3, 3, 7 +; ASM32PWR4-NEXT: lwz 8, 80(1) +; ASM32PWR4-NEXT: add 3, 3, 8 +; ASM32PWR4-NEXT: lfs 0, 0(5) +; ASM32PWR4-NEXT: xoris 3, 3, 32768 +; ASM32PWR4-NEXT: stw 3, -12(1) +; ASM32PWR4-NEXT: addi 3, 1, -4 +; ASM32PWR4-NEXT: lfd 2, -16(1) +; ASM32PWR4-NEXT: fsub 0, 2, 0 +; ASM32PWR4-NEXT: fadd 0, 0, 1 +; ASM32PWR4-NEXT: fctiwz 0, 0 +; ASM32PWR4-NEXT: stfiwx 0, 0, 3 +; ASM32PWR4-NEXT: lwz 3, -4(1) +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: mix_callee: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: lwz 3, 116(1) +; ASM64PWR4-NEXT: add 4, 7, 8 +; ASM64PWR4-NEXT: fadd 0, 1, 2 +; ASM64PWR4-NEXT: add 4, 4, 9 +; ASM64PWR4-NEXT: fadd 0, 0, 3 +; ASM64PWR4-NEXT: add 4, 4, 10 +; ASM64PWR4-NEXT: lwz 5, 124(1) +; ASM64PWR4-NEXT: add 3, 4, 3 +; ASM64PWR4-NEXT: add 3, 3, 5 +; ASM64PWR4-NEXT: fadd 0, 0, 4 +; ASM64PWR4-NEXT: extsw 3, 3 +; ASM64PWR4-NEXT: std 3, -16(1) +; ASM64PWR4-NEXT: addi 3, 1, -4 +; ASM64PWR4-NEXT: lfd 1, -16(1) +; ASM64PWR4-NEXT: fcfid 1, 1 +; ASM64PWR4-NEXT: fadd 0, 1, 0 +; ASM64PWR4-NEXT: fctiwz 0, 0 +; ASM64PWR4-NEXT: stfiwx 0, 0, 3 +; ASM64PWR4-NEXT: lwz 3, -4(1) +; ASM64PWR4-NEXT: blr entry: %add = fadd double %d1, %d2 %add1 = fadd double %add, %d3 @@ -1991,137 +1793,149 @@ define i32 @mix_callee(double %d1, double %d2, double %d3, double %d4, i8 zeroex ret i32 %conv16 } -; CHECK-LABEL: mix_callee - -; 32BIT-LABEL: liveins: -; 32BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f2', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f3', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f4', virtual-reg: '' } - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 6, type: default, offset: 56, size: 4 -; 32BIT-DAG: - { id: 5, type: default, offset: 60, size: 4 -; 32BIT-DAG: - { id: 4, type: default, offset: 64, size: 4 -; 32BIT-DAG: - { id: 3, type: default, offset: 68, size: 4 -; 32BIT-DAG: - { id: 2, type: default, offset: 72, size: 4 -; 32BIT-DAG: - { id: 1, type: default, offset: 76, size: 4 -; 32BIT-DAG: - { id: 0, type: default, offset: 80, size: 4 - -; 32BIT-LABEL: body: | -; 32BIT-DAG: bb.0.entry: -; 32BIT-DAG: liveins: $f1, $f2, $f3, $f4 - -; 64BIT-LABEL: liveins: -; 64BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f2', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f3', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 1, type: default, offset: 116, size: 4 -; 64BIT-DAG: - { id: 0, type: default, offset: 124, size: 4 - -; 64BIT-LABEL: body: | -; 64BIT-DAG: bb.0.entry: -; 64BIT-DAG: liveins: $f1, $f2, $f3, $f4, $x7, $x8, $x9, $x10 - -; CHECKASM-LABEL: .mix_callee - -; ASM32PWR4-DAG: lwz [[REG1:[0-9]+]], 56(1) -; ASM32PWR4-DAG: lwz [[REG2:[0-9]+]], 60(1) -; ASM32PWR4-DAG: lwz [[REG4:[0-9]+]], 68(1) -; ASM32PWR4-DAG: lwz [[REG5:[0-9]+]], 72(1) -; ASM32PWR4-DAG: lwz [[REG6:[0-9]+]], 76(1) -; ASM32PWR4-DAG: lwz [[REG7:[0-9]+]], 80(1) -; ASM32PWR4-DAG: blr - -; ASM64PWR-DAG: ld [[REG1:[0-9]+]], 112(1) -; ASM64PWR-DAG: ld [[REG2:[0-9]+]], 120(1) -; ASM64PWR-DAG: fadd 0, 0, [[REG1]] -; ASM64PWR-DAG: add 3, 3, [[REG2]] -; ASM64PWR-DAG: blr - define void @caller_mix() { +; ASM32PWR4-LABEL: caller_mix: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -96(1) +; ASM32PWR4-NEXT: li 3, 60 +; ASM32PWR4-NEXT: stw 0, 104(1) +; ASM32PWR4-NEXT: stw 3, 80(1) +; ASM32PWR4-NEXT: li 3, 50 +; ASM32PWR4-NEXT: stw 3, 76(1) +; ASM32PWR4-NEXT: li 3, 40 +; ASM32PWR4-NEXT: stw 3, 72(1) +; ASM32PWR4-NEXT: li 3, 0 +; ASM32PWR4-NEXT: stw 3, 64(1) +; ASM32PWR4-NEXT: li 3, 2 +; ASM32PWR4-NEXT: stw 3, 60(1) +; ASM32PWR4-NEXT: lwz 3, L..C35(2) # %const.0 +; ASM32PWR4-NEXT: lfd 1, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C36(2) # %const.1 +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C37(2) # %const.2 +; ASM32PWR4-NEXT: lfd 3, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C38(2) # %const.3 +; ASM32PWR4-NEXT: lfd 4, 0(3) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: stw 3, 56(1) +; ASM32PWR4-NEXT: lis 3, 457 +; ASM32PWR4-NEXT: ori 3, 3, 50048 +; ASM32PWR4-NEXT: stw 3, 68(1) +; ASM32PWR4-NEXT: bl .mix_callee +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 96 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: caller_mix: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -128(1) +; ASM64PWR4-NEXT: ld 3, L..C33(2) # %const.0 +; ASM64PWR4-NEXT: ld 4, L..C34(2) # %const.1 +; ASM64PWR4-NEXT: lis 5, 457 +; ASM64PWR4-NEXT: li 7, 1 +; ASM64PWR4-NEXT: std 0, 144(1) +; ASM64PWR4-NEXT: ori 9, 5, 50048 +; ASM64PWR4-NEXT: li 8, 2 +; ASM64PWR4-NEXT: lfd 1, 0(3) +; ASM64PWR4-NEXT: ld 3, L..C35(2) # %const.2 +; ASM64PWR4-NEXT: li 10, 40 +; ASM64PWR4-NEXT: lfd 2, 0(4) +; ASM64PWR4-NEXT: ld 4, L..C36(2) # %const.3 +; ASM64PWR4-NEXT: lfd 3, 0(3) +; ASM64PWR4-NEXT: li 3, 60 +; ASM64PWR4-NEXT: lfd 4, 0(4) +; ASM64PWR4-NEXT: li 4, 50 +; ASM64PWR4-NEXT: std 3, 120(1) +; ASM64PWR4-NEXT: std 4, 112(1) +; ASM64PWR4-NEXT: bl .mix_callee +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 128 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %call = call i32 @mix_callee(double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, i8 zeroext 1, i16 signext 2, i64 30000000, i32 40, i32 50, i32 60) ret void } -; CHECK-LABEL: name: caller_mix - -; 32BIT-DAG: ADJCALLSTACKDOWN 84, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.0, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.1, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.2, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f3 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.3, $r2 :: (load (s32) from got) -; 32BIT-DAG: renamable $f4 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 1 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 2 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LIS 457 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 0 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 40 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 50 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 60 -; 32BIT-DAG: STW killed renamable $r[[REG1:[0-9]+]], 56, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REG3:[0-9]+]], 64, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REG5:[0-9]+]], 72, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r[[REG7:[0-9]+]], 80, $r1 :: (store (s32)) -; 32BIT-DAG: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $r2, implicit-def $r1, implicit-def dead $r3 -; 32BIT-DAG: ADJCALLSTACKUP 84, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: BLR implicit $lr, implicit $rm - -; 64BIT-DAG: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.1, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.2, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.3, $x2 :: (load (s64) from got) -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f3 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $f4 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LI8 50 -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LI8 60 -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LIS8 457 -; 64BIT-DAG: $x7 = LI8 1 -; 64BIT-DAG: $x8 = LI8 2 -; 64BIT-DAG: $x10 = LI8 40 -; 64BIT-DAG: STD killed renamable $x[[REG1:[0-9]+]], 112, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG2:[0-9]+]], 120, $x1 :: (store (s64)) -; 64BIT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm - -; CHEKASM-LABEL: .caller_mix - -; ASM32PWR4: mflr 0 -; ASM32PWR4-DAG: stw [[REG1:[0-9]+]], 56(1) -; ASM32PWR4-DAG: stw [[REG2:[0-9]+]], 60(1) -; ASM32PWR4-DAG: stw [[REG3:[0-9]+]], 64(1) -; ASM32PWR4-DAG: stw [[REG4:[0-9]+]], 68(1) -; ASM32PWR4-DAG: stw [[REG5:[0-9]+]], 72(1) -; ASM32PWR4-DAG: stw [[REG6:[0-9]+]], 76(1) -; ASM32PWR4-DAG: stw [[REG7:[0-9]+]], 80(1) -; ASM32PWR4-DAG: bl .mix_callee -; ASM32PWR4-DAG: blr - -; ASM64PWR4: mflr 0 -; ASM64PWR4-DAG: std [[REG1:[0-9]+]], 112(1) -; ASM64PWR4-DAG: std [[REG2:[0-9]+]], 120(1) -; ASM64PWR4-DAG: bl .mix_callee -; ASM64PWR4-DAG: blr - - define i32 @mix_floats(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14) { +; ASM32PWR4-LABEL: mix_floats: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: add 3, 3, 4 +; ASM32PWR4-NEXT: lwz 4, L..C39(2) # %const.0 +; ASM32PWR4-NEXT: lis 11, 17200 +; ASM32PWR4-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; ASM32PWR4-NEXT: add 3, 3, 5 +; ASM32PWR4-NEXT: add 3, 3, 6 +; ASM32PWR4-NEXT: add 3, 3, 7 +; ASM32PWR4-NEXT: stw 11, -24(1) +; ASM32PWR4-NEXT: add 3, 3, 8 +; ASM32PWR4-NEXT: add 3, 3, 9 +; ASM32PWR4-NEXT: add 3, 3, 10 +; ASM32PWR4-NEXT: lfs 0, 0(4) +; ASM32PWR4-NEXT: xoris 3, 3, 32768 +; ASM32PWR4-NEXT: stw 3, -20(1) +; ASM32PWR4-NEXT: addi 3, 1, -12 +; ASM32PWR4-NEXT: lfd 31, -24(1) +; ASM32PWR4-NEXT: fsub 0, 31, 0 +; ASM32PWR4-NEXT: fadd 0, 0, 1 +; ASM32PWR4-NEXT: lfd 1, 160(1) +; ASM32PWR4-NEXT: fadd 0, 0, 2 +; ASM32PWR4-NEXT: fadd 0, 0, 3 +; ASM32PWR4-NEXT: fadd 0, 0, 4 +; ASM32PWR4-NEXT: fadd 0, 0, 5 +; ASM32PWR4-NEXT: fadd 0, 0, 6 +; ASM32PWR4-NEXT: fadd 0, 0, 7 +; ASM32PWR4-NEXT: fadd 0, 0, 8 +; ASM32PWR4-NEXT: fadd 0, 0, 9 +; ASM32PWR4-NEXT: fadd 0, 0, 10 +; ASM32PWR4-NEXT: fadd 0, 0, 11 +; ASM32PWR4-NEXT: fadd 0, 0, 12 +; ASM32PWR4-NEXT: fadd 0, 0, 13 +; ASM32PWR4-NEXT: fadd 0, 0, 1 +; ASM32PWR4-NEXT: fctiwz 0, 0 +; ASM32PWR4-NEXT: stfiwx 0, 0, 3 +; ASM32PWR4-NEXT: lwz 3, -12(1) +; ASM32PWR4-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: mix_floats: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: add 3, 3, 4 +; ASM64PWR4-NEXT: add 3, 3, 5 +; ASM64PWR4-NEXT: add 3, 3, 6 +; ASM64PWR4-NEXT: add 3, 3, 7 +; ASM64PWR4-NEXT: add 3, 3, 8 +; ASM64PWR4-NEXT: add 3, 3, 9 +; ASM64PWR4-NEXT: add 3, 3, 10 +; ASM64PWR4-NEXT: extsw 3, 3 +; ASM64PWR4-NEXT: std 3, -16(1) +; ASM64PWR4-NEXT: addi 3, 1, -4 +; ASM64PWR4-NEXT: lfd 0, -16(1) +; ASM64PWR4-NEXT: fcfid 0, 0 +; ASM64PWR4-NEXT: fadd 0, 0, 1 +; ASM64PWR4-NEXT: lfd 1, 216(1) +; ASM64PWR4-NEXT: fadd 0, 0, 2 +; ASM64PWR4-NEXT: fadd 0, 0, 3 +; ASM64PWR4-NEXT: fadd 0, 0, 4 +; ASM64PWR4-NEXT: fadd 0, 0, 5 +; ASM64PWR4-NEXT: fadd 0, 0, 6 +; ASM64PWR4-NEXT: fadd 0, 0, 7 +; ASM64PWR4-NEXT: fadd 0, 0, 8 +; ASM64PWR4-NEXT: fadd 0, 0, 9 +; ASM64PWR4-NEXT: fadd 0, 0, 10 +; ASM64PWR4-NEXT: fadd 0, 0, 11 +; ASM64PWR4-NEXT: fadd 0, 0, 12 +; ASM64PWR4-NEXT: fadd 0, 0, 13 +; ASM64PWR4-NEXT: fadd 0, 0, 1 +; ASM64PWR4-NEXT: fctiwz 0, 0 +; ASM64PWR4-NEXT: stfiwx 0, 0, 3 +; ASM64PWR4-NEXT: lwz 3, -4(1) +; ASM64PWR4-NEXT: blr entry: %add = add nsw i32 %i1, %i2 %add1 = add nsw i32 %add, %i3 @@ -2149,203 +1963,240 @@ define void @caller_mix() { ret i32 %conv21 } -; CHECK-LABEL: mix_floats - -; 32BIT-LABEL: liveins: -; 32BIT-DAG: - { reg: '$r3', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r4', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r5', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r6', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r7', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r8', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r9', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r10', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f2', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f3', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f4', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f5', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f6', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f7', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f8', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f9', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f10', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f11', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f12', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f13', virtual-reg: '' } - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 0, type: default, offset: 160, size: 8 - -; 32BIT-LABEL: body: | -; 32BIT-DAG: bb.0.entry: -; 32BIT-DAG: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 - -; 64BIT-DAG: liveins: -; 64BIT-DAG: - { reg: '$x3', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x5', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x6', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f2', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f3', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f5', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f6', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f10', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f11', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f12', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f13', virtual-reg: '' } - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 0, type: default, offset: 216, size: 8 - -; 64BIT-LABEL: body: | -; 64BIT-DAG: bb.0.entry: -; 64BIT-DAG: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 - -; CHECKASM-LABEL: .mix_floats: - -; ASM32PWR4-DAG: lfd [[REGF:[0-9]+]], 160(1) -; ASM32PWR4-DAG: fadd 0, 0, [[REGF]] -; ASM32PWR4-DAG: blr - -; ASM64PWR4-DAG: lfd [[REG1:[0-9]+]], 216(1) -; ASM64PWR4-DAG: fadd 0, 0, [[REG1]] -; ASM64PWR4-DAG: blr - define void @mix_floats_caller() { +; ASM32PWR4-LABEL: mix_floats_caller: +; ASM32PWR4: # %bb.0: # %entry +; ASM32PWR4-NEXT: mflr 0 +; ASM32PWR4-NEXT: stwu 1, -176(1) +; ASM32PWR4-NEXT: li 3, 0 +; ASM32PWR4-NEXT: stw 0, 184(1) +; ASM32PWR4-NEXT: lis 4, 16352 +; ASM32PWR4-NEXT: lis 5, 16339 +; ASM32PWR4-NEXT: lis 6, 16364 +; ASM32PWR4-NEXT: stw 3, 92(1) +; ASM32PWR4-NEXT: ori 5, 5, 13107 +; ASM32PWR4-NEXT: ori 6, 6, 52428 +; ASM32PWR4-NEXT: stw 3, 132(1) +; ASM32PWR4-NEXT: lis 3, 16368 +; ASM32PWR4-NEXT: li 8, 6 +; ASM32PWR4-NEXT: li 9, 7 +; ASM32PWR4-NEXT: li 10, 8 +; ASM32PWR4-NEXT: stw 3, 128(1) +; ASM32PWR4-NEXT: lis 3, -26215 +; ASM32PWR4-NEXT: ori 3, 3, 39322 +; ASM32PWR4-NEXT: stw 4, 88(1) +; ASM32PWR4-NEXT: lis 4, 16313 +; ASM32PWR4-NEXT: ori 4, 4, 39321 +; ASM32PWR4-NEXT: stw 3, 60(1) +; ASM32PWR4-NEXT: stw 3, 68(1) +; ASM32PWR4-NEXT: stw 3, 84(1) +; ASM32PWR4-NEXT: stw 3, 116(1) +; ASM32PWR4-NEXT: stw 3, 140(1) +; ASM32PWR4-NEXT: lis 3, 16369 +; ASM32PWR4-NEXT: ori 3, 3, 39321 +; ASM32PWR4-NEXT: stw 4, 56(1) +; ASM32PWR4-NEXT: lis 4, 16329 +; ASM32PWR4-NEXT: ori 4, 4, 39321 +; ASM32PWR4-NEXT: stw 3, 136(1) +; ASM32PWR4-NEXT: lis 3, 16371 +; ASM32PWR4-NEXT: ori 3, 3, 13107 +; ASM32PWR4-NEXT: stw 4, 64(1) +; ASM32PWR4-NEXT: lis 4, 13107 +; ASM32PWR4-NEXT: ori 4, 4, 13107 +; ASM32PWR4-NEXT: stw 3, 144(1) +; ASM32PWR4-NEXT: lis 3, 16372 +; ASM32PWR4-NEXT: ori 3, 3, 52428 +; ASM32PWR4-NEXT: stw 4, 76(1) +; ASM32PWR4-NEXT: stw 4, 100(1) +; ASM32PWR4-NEXT: stw 4, 148(1) +; ASM32PWR4-NEXT: lwz 4, L..C40(2) # %const.0 +; ASM32PWR4-NEXT: stw 3, 152(1) +; ASM32PWR4-NEXT: lwz 3, L..C41(2) # %const.1 +; ASM32PWR4-NEXT: lfd 1, 0(4) +; ASM32PWR4-NEXT: lwz 4, L..C42(2) # %const.2 +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C43(2) # %const.3 +; ASM32PWR4-NEXT: stw 5, 72(1) +; ASM32PWR4-NEXT: lis 5, 16345 +; ASM32PWR4-NEXT: ori 5, 5, 39321 +; ASM32PWR4-NEXT: stw 5, 80(1) +; ASM32PWR4-NEXT: lis 5, 16355 +; ASM32PWR4-NEXT: ori 5, 5, 13107 +; ASM32PWR4-NEXT: lfd 3, 0(4) +; ASM32PWR4-NEXT: lwz 4, L..C44(2) # %const.4 +; ASM32PWR4-NEXT: lfd 4, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C45(2) # %const.5 +; ASM32PWR4-NEXT: stw 5, 96(1) +; ASM32PWR4-NEXT: lis 5, 26214 +; ASM32PWR4-NEXT: ori 7, 5, 26214 +; ASM32PWR4-NEXT: lis 5, 16358 +; ASM32PWR4-NEXT: lfd 6, 0(4) +; ASM32PWR4-NEXT: lwz 4, L..C46(2) # %const.6 +; ASM32PWR4-NEXT: ori 5, 5, 26214 +; ASM32PWR4-NEXT: lfd 7, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C47(2) # %const.7 +; ASM32PWR4-NEXT: stw 5, 104(1) +; ASM32PWR4-NEXT: lis 5, 16361 +; ASM32PWR4-NEXT: ori 5, 5, 39321 +; ASM32PWR4-NEXT: lfd 8, 0(4) +; ASM32PWR4-NEXT: lwz 4, L..C48(2) # %const.8 +; ASM32PWR4-NEXT: lfd 9, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C49(2) # %const.9 +; ASM32PWR4-NEXT: stw 5, 112(1) +; ASM32PWR4-NEXT: lis 5, -13108 +; ASM32PWR4-NEXT: ori 5, 5, 52429 +; ASM32PWR4-NEXT: stw 5, 124(1) +; ASM32PWR4-NEXT: stw 5, 156(1) +; ASM32PWR4-NEXT: lwz 5, L..C50(2) # %const.12 +; ASM32PWR4-NEXT: lfd 11, 0(4) +; ASM32PWR4-NEXT: lwz 4, L..C51(2) # %const.10 +; ASM32PWR4-NEXT: lfd 12, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C52(2) # %const.11 +; ASM32PWR4-NEXT: lfd 13, 0(4) +; ASM32PWR4-NEXT: lis 4, 16374 +; ASM32PWR4-NEXT: ori 11, 4, 26214 +; ASM32PWR4-NEXT: li 4, 2 +; ASM32PWR4-NEXT: lfs 5, 0(3) +; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: lfs 10, 0(5) +; ASM32PWR4-NEXT: li 5, 3 +; ASM32PWR4-NEXT: stw 7, 108(1) +; ASM32PWR4-NEXT: stw 6, 120(1) +; ASM32PWR4-NEXT: li 6, 4 +; ASM32PWR4-NEXT: stw 7, 164(1) +; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: stw 11, 160(1) +; ASM32PWR4-NEXT: bl .mix_floats +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 176 +; ASM32PWR4-NEXT: lwz 0, 8(1) +; ASM32PWR4-NEXT: mtlr 0 +; ASM32PWR4-NEXT: blr +; +; ASM64PWR4-LABEL: mix_floats_caller: +; ASM64PWR4: # %bb.0: # %entry +; ASM64PWR4-NEXT: mflr 0 +; ASM64PWR4-NEXT: stdu 1, -240(1) +; ASM64PWR4-NEXT: li 3, 1023 +; ASM64PWR4-NEXT: std 0, 256(1) +; ASM64PWR4-NEXT: ld 4, L..C37(2) # %const.0 +; ASM64PWR4-NEXT: ld 8, L..C38(2) # %const.6 +; ASM64PWR4-NEXT: lis 5, 16371 +; ASM64PWR4-NEXT: ld 6, L..C39(2) # %const.3 +; ASM64PWR4-NEXT: ld 9, L..C40(2) # %const.9 +; ASM64PWR4-NEXT: ld 10, L..C41(2) # %const.11 +; ASM64PWR4-NEXT: rldic 3, 3, 52, 2 +; ASM64PWR4-NEXT: lis 11, 4091 +; ASM64PWR4-NEXT: std 3, 184(1) +; ASM64PWR4-NEXT: li 3, 511 +; ASM64PWR4-NEXT: lis 12, 16361 +; ASM64PWR4-NEXT: rldic 3, 3, 53, 2 +; ASM64PWR4-NEXT: lfd 1, 0(4) +; ASM64PWR4-NEXT: ld 4, L..C42(2) # %const.2 +; ASM64PWR4-NEXT: lis 0, 16345 +; ASM64PWR4-NEXT: std 3, 144(1) +; ASM64PWR4-NEXT: ld 3, L..C43(2) # %const.1 +; ASM64PWR4-NEXT: lfd 2, 0(3) +; ASM64PWR4-NEXT: lis 3, 16374 +; ASM64PWR4-NEXT: ori 7, 3, 26214 +; ASM64PWR4-NEXT: ori 3, 5, 13107 +; ASM64PWR4-NEXT: ld 5, L..C44(2) # %const.5 +; ASM64PWR4-NEXT: lfd 8, 0(8) +; ASM64PWR4-NEXT: ld 8, L..C45(2) # %const.8 +; ASM64PWR4-NEXT: rldimi 7, 7, 32, 0 +; ASM64PWR4-NEXT: rlwimi 7, 7, 16, 0, 15 +; ASM64PWR4-NEXT: rldimi 3, 3, 32, 0 +; ASM64PWR4-NEXT: lfd 3, 0(4) +; ASM64PWR4-NEXT: ld 4, L..C46(2) # %const.4 +; ASM64PWR4-NEXT: rlwimi 3, 3, 16, 0, 15 +; ASM64PWR4-NEXT: lfd 4, 0(6) +; ASM64PWR4-NEXT: lis 6, 16355 +; ASM64PWR4-NEXT: lfd 7, 0(5) +; ASM64PWR4-NEXT: ori 5, 6, 13107 +; ASM64PWR4-NEXT: ld 6, L..C47(2) # %const.7 +; ASM64PWR4-NEXT: rldimi 5, 5, 32, 0 +; ASM64PWR4-NEXT: rlwimi 5, 5, 16, 0, 15 +; ASM64PWR4-NEXT: lfd 11, 0(8) +; ASM64PWR4-NEXT: ld 8, L..C48(2) # %const.10 +; ASM64PWR4-NEXT: lfd 6, 0(4) +; ASM64PWR4-NEXT: lis 4, 16358 +; ASM64PWR4-NEXT: ori 4, 4, 26214 +; ASM64PWR4-NEXT: rldimi 4, 4, 32, 0 +; ASM64PWR4-NEXT: lfd 9, 0(6) +; ASM64PWR4-NEXT: lis 6, 16339 +; ASM64PWR4-NEXT: rlwimi 4, 4, 16, 0, 15 +; ASM64PWR4-NEXT: ori 6, 6, 13107 +; ASM64PWR4-NEXT: lfd 12, 0(9) +; ASM64PWR4-NEXT: lis 9, 4093 +; ASM64PWR4-NEXT: ori 9, 9, 13107 +; ASM64PWR4-NEXT: lfd 13, 0(8) +; ASM64PWR4-NEXT: lis 8, 16369 +; ASM64PWR4-NEXT: ori 8, 8, 39321 +; ASM64PWR4-NEXT: rldimi 6, 6, 32, 0 +; ASM64PWR4-NEXT: std 31, 232(1) # 8-byte Folded Spill +; ASM64PWR4-NEXT: ld 31, L..C49(2) # %const.12 +; ASM64PWR4-NEXT: rldic 9, 9, 34, 2 +; ASM64PWR4-NEXT: rlwimi 6, 6, 16, 0, 15 +; ASM64PWR4-NEXT: oris 9, 9, 52428 +; ASM64PWR4-NEXT: lfs 5, 0(10) +; ASM64PWR4-NEXT: lis 10, 16329 +; ASM64PWR4-NEXT: ori 10, 10, 39321 +; ASM64PWR4-NEXT: std 7, 216(1) +; ASM64PWR4-NEXT: ori 7, 11, 13107 +; ASM64PWR4-NEXT: ori 11, 12, 39321 +; ASM64PWR4-NEXT: ori 12, 0, 39321 +; ASM64PWR4-NEXT: std 4, 160(1) +; ASM64PWR4-NEXT: rldic 4, 8, 32, 2 +; ASM64PWR4-NEXT: rldic 7, 7, 34, 2 +; ASM64PWR4-NEXT: oris 4, 4, 39321 +; ASM64PWR4-NEXT: std 30, 224(1) # 8-byte Folded Spill +; ASM64PWR4-NEXT: lis 30, 16313 +; ASM64PWR4-NEXT: rldic 8, 11, 32, 2 +; ASM64PWR4-NEXT: rldic 11, 12, 32, 2 +; ASM64PWR4-NEXT: std 3, 200(1) +; ASM64PWR4-NEXT: ori 3, 30, 39321 +; ASM64PWR4-NEXT: ori 4, 4, 39322 +; ASM64PWR4-NEXT: rldic 3, 3, 32, 2 +; ASM64PWR4-NEXT: std 5, 152(1) +; ASM64PWR4-NEXT: rldic 5, 10, 32, 2 +; ASM64PWR4-NEXT: oris 5, 5, 39321 +; ASM64PWR4-NEXT: oris 3, 3, 39321 +; ASM64PWR4-NEXT: std 6, 128(1) +; ASM64PWR4-NEXT: oris 6, 7, 52428 +; ASM64PWR4-NEXT: ori 7, 9, 52429 +; ASM64PWR4-NEXT: li 9, 7 +; ASM64PWR4-NEXT: lfs 10, 0(31) +; ASM64PWR4-NEXT: li 10, 8 +; ASM64PWR4-NEXT: std 7, 208(1) +; ASM64PWR4-NEXT: oris 7, 8, 39321 +; ASM64PWR4-NEXT: oris 8, 11, 39321 +; ASM64PWR4-NEXT: ori 11, 3, 39322 +; ASM64PWR4-NEXT: li 3, 1 +; ASM64PWR4-NEXT: std 4, 192(1) +; ASM64PWR4-NEXT: ori 4, 6, 52429 +; ASM64PWR4-NEXT: ori 6, 8, 39322 +; ASM64PWR4-NEXT: std 4, 176(1) +; ASM64PWR4-NEXT: ori 4, 7, 39322 +; ASM64PWR4-NEXT: ori 7, 5, 39322 +; ASM64PWR4-NEXT: li 5, 3 +; ASM64PWR4-NEXT: li 8, 6 +; ASM64PWR4-NEXT: std 4, 168(1) +; ASM64PWR4-NEXT: li 4, 2 +; ASM64PWR4-NEXT: std 6, 136(1) +; ASM64PWR4-NEXT: li 6, 4 +; ASM64PWR4-NEXT: std 7, 120(1) +; ASM64PWR4-NEXT: li 7, 5 +; ASM64PWR4-NEXT: std 11, 112(1) +; ASM64PWR4-NEXT: bl .mix_floats +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: ld 31, 232(1) # 8-byte Folded Reload +; ASM64PWR4-NEXT: ld 30, 224(1) # 8-byte Folded Reload +; ASM64PWR4-NEXT: addi 1, 1, 240 +; ASM64PWR4-NEXT: ld 0, 16(1) +; ASM64PWR4-NEXT: mtlr 0 +; ASM64PWR4-NEXT: blr entry: %call = call i32 @mix_floats(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01, double 6.000000e-01, double 0x3FE6666666666666, double 8.000000e-01, double 9.000000e-01, double 1.000000e+00, double 1.100000e+00, double 1.200000e+00, double 1.300000e+00, double 1.400000e+00) ret void } -; CHECK-LABEL: mix_floats_caller - -; 32BIT-DAG: ADJCALLSTACKDOWN 168, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: $r3 = LI 1 -; 32BIT-DAG: $r4 = LI 2 -; 32BIT-DAG: $r5 = LI 3 -; 32BIT-DAG: $r6 = LI 4 -; 32BIT-DAG: $r7 = LI 5 -; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: STW killed renamable $r[[REG1:[0-9]+]], 56, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG3:[0-9]+]], 64, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG5:[0-9]+]], 72, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG7:[0-9]+]], 80, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG8:[0-9]+]], 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG9:[0-9]+]], 88, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG10:[0-9]+]], 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG11:[0-9]+]], 96, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG12:[0-9]+]], 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG13:[0-9]+]], 104, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG14:[0-9]+]], 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG15:[0-9]+]], 112, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG16:[0-9]+]], 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG17:[0-9]+]], 120, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[REG18:[0-9]+]], 128, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r[[REG19:[0-9]+]], 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG20:[0-9]+]], 132, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG21:[0-9]+]], 136, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[REG22:[0-9]+]], 140, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG23:[0-9]+]], 144, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[REG24:[0-9]+]], 148, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG25:[0-9]+]], 152, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[REG26:[0-9]+]], 156, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG27:[0-9]+]], 160, $r1 :: (store (s32), align 8) -; 32BIT-DAG: STW killed renamable $r[[REG28:[0-9]+]], 164, $r1 :: (store (s32) into unknown-address + 4, basealign 8) -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $r3 -; 32BIT-NEXT: ADJCALLSTACKUP 168, 0, implicit-def dead $r1, implicit $r1 - - -; 64BIT-DAG: ADJCALLSTACKDOWN 224, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: $x3 = LI8 1 -; 64BIT-DAG: $x4 = LI8 2 -; 64BIT-DAG: $x5 = LI8 3 -; 64BIT-DAG: $x6 = LI8 4 -; 64BIT-DAG: $x7 = LI8 5 -; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: STD killed renamable $x[[REG1:[0-9]+]], 112, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG2:[0-9]+]], 120, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG3:[0-9]+]], 128, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG4:[0-9]+]], 136, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG5:[0-9]+]], 144, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG6:[0-9]+]], 152, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG7:[0-9]+]], 160, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG8:[0-9]+]], 168, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG9:[0-9]+]], 176, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG10:[0-9]+]], 184, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG12:[0-9]+]], 192, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG13:[0-9]+]], 200, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG14:[0-9]+]], 208, $x1 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x[[REG15:[0-9]+]], 216, $x1 :: (store (s64)) -; 64BIT-DAG: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $x3 -; 64BIT-NEXT: ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1 - -; CHEKASM-LABEL: .mix_floats_caller: - -; ASM32PWR4: mflr 0 -; ASM32PWR4-NEXT: stwu 1, -176(1) -; ASM32PWR4-DAG: stw 0, 184(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 56(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 60(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 64(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 68(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 72(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 76(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 80(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 84(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 88(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 92(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 96(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 100(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 104(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 108(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 112(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 116(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 120(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 124(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 128(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 132(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 136(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 140(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 144(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 148(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 152(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 156(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 160(1) -; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 164(1) -; ASM32PWR4: bl .mix_floats - -; ASM64PWR4: mflr 0 -; ASM64PWR4-NEXT: stdu 1, -240(1) -; ASM64PWR4-DAG: std 0, 256(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 112(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 120(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 128(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 136(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 144(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 152(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 160(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 168(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 176(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 184(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 192(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 200(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 208(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 216(1) -; ASM64PWR4: bl .mix_floats From 9557fcca563dba3dd31769c297bb3b97d6e614f9 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 1 Dec 2023 09:32:22 +0100 Subject: [PATCH 09/72] [libc] Fix lint message (#73956) --- libc/src/__support/CMakeLists.txt | 2 ++ libc/src/__support/str_to_num_result.h | 13 ++++++++----- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 3 +-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index a76b22960f5a50..decd6ed2dbd2bd 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -79,6 +79,8 @@ add_header_library( str_to_num_result HDRS str_to_num_result.h + DEPENDS + libc.src.__support.macros.attributes ) add_header_library( diff --git a/libc/src/__support/str_to_num_result.h b/libc/src/__support/str_to_num_result.h index 9ba704c690655e..b32fbdeeb580fe 100644 --- a/libc/src/__support/str_to_num_result.h +++ b/libc/src/__support/str_to_num_result.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_NUM_RESULT_H #define LLVM_LIBC_SRC___SUPPORT_STR_TO_NUM_RESULT_H +#include "src/__support/macros/attributes.h" // LIBC_INLINE + #include namespace LIBC_NAMESPACE { @@ -18,15 +20,16 @@ template struct StrToNumResult { int error; ptrdiff_t parsed_len; - constexpr StrToNumResult(T value) : value(value), error(0), parsed_len(0) {} - constexpr StrToNumResult(T value, ptrdiff_t parsed_len) + LIBC_INLINE constexpr StrToNumResult(T value) + : value(value), error(0), parsed_len(0) {} + LIBC_INLINE constexpr StrToNumResult(T value, ptrdiff_t parsed_len) : value(value), error(0), parsed_len(parsed_len) {} - constexpr StrToNumResult(T value, ptrdiff_t parsed_len, int error) + LIBC_INLINE constexpr StrToNumResult(T value, ptrdiff_t parsed_len, int error) : value(value), error(error), parsed_len(parsed_len) {} - constexpr bool has_error() { return error != 0; } + LIBC_INLINE constexpr bool has_error() { return error != 0; } - constexpr operator T() { return value; } + LIBC_INLINE constexpr operator T() { return value; } }; } // namespace LIBC_NAMESPACE diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index a0a6a4366ea753..fdd620a4d415c0 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -474,8 +474,7 @@ libc_support_library( libc_support_library( name = "__support_str_to_num_result", hdrs = ["src/__support/str_to_num_result.h"], - deps = [ - ], + deps = [":__support_macros_attributes"], ) libc_support_library( From 1726b65e4c273d55dd54838a742b03caff4abcdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Fri, 1 Dec 2023 08:45:13 +0000 Subject: [PATCH 10/72] [MLIR][Vector] Refactor tests for contract -> OP transforms (4/N) (#73807) This patch refactors tests for: vector.contract -> vector.outerproduct for matvec operations (b += Ax). Summary of changes: * add 2 missing cases (masked + scalable) when the operation kind is `maxf`. This is a part of a larger effort to add cases with scalable vectors to tests for the Vector dialect. Implements #72834. --- ...act-to-outerproduct-matvec-transforms.mlir | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-matvec-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-matvec-transforms.mlir index e84a43feaff39d..8fed1f8fb34154 100644 --- a/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-matvec-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-matvec-transforms.mlir @@ -186,6 +186,52 @@ func.func @matvec_mk_k_m_max(%A: vector<2x2xf32>, return %0 : vector<2xf32> } +// CHECK-LABEL: func.func @masked_matvec_mk_k_m_max( +// CHECK-SAME: %{{.*}}: vector<2x3xf32>, +// CHECK-SAME: %{{.*}}: vector<3xf32>, +// CHECK-SAME: %{{.*}}: vector<2xf32>, +// CHECK-SAME: %[[IN_MASK:.*]]: vector<2x3xi1>) -> vector<2xf32> +// CHECK: %[[T_MASK:.*]] = vector.transpose %[[IN_MASK]], [1, 0] : vector<2x3xi1> to vector<3x2xi1> +// CHECK: %[[MASK0:.*]] = vector.extract %[[T_MASK]][0] : vector<2xi1> from vector<3x2xi1> +// CHECK: vector.mask %[[MASK0]] { vector.outerproduct {{.*}} {kind = #vector.kind} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> + +// CHECK: %[[MASK1:.*]] = vector.extract %[[T_MASK]][1] : vector<2xi1> from vector<3x2xi1> +// CHECK: vector.mask %[[MASK1]] { vector.outerproduct {{.*}} {kind = #vector.kind} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> + +// CHECK: %[[MASK2:.*]] = vector.extract %[[T_MASK]][2] : vector<2xi1> from vector<3x2xi1> +// CHECK: vector.mask %[[MASK2]] { vector.outerproduct {{.*}} {kind = #vector.kind} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> +func.func @masked_matvec_mk_k_m_max(%A: vector<2x3xf32>, + %x: vector<3xf32>, + %b: vector<2xf32>, + %m: vector<2x3xi1>) -> vector<2xf32> { + %0 = vector.mask %m { vector.contract #matvecmax_trait %A, %x, %b + : vector<2x3xf32>, vector<3xf32> into vector<2xf32> } : vector<2x3xi1> -> vector<2xf32> + return %0 : vector<2xf32> +} + +// CHECK-LABEL: func.func @masked_matvec_mk_k_m_max_scalable_parallel_dim( +// CHECK-SAME: %{{.*}}: vector<[2]x3xf32>, +// CHECK-SAME: %{{.*}}: vector<3xf32>, +// CHECK-SAME: %{{.*}}: vector<[2]xf32>, +// CHECK-SAME: %[[IN_MASK:.*]]: vector<[2]x3xi1>) -> vector<[2]xf32> +// CHECK: %[[T_MASK:.*]] = vector.transpose %[[IN_MASK]], [1, 0] : vector<[2]x3xi1> to vector<3x[2]xi1> +// CHECK: %[[MASK0:.*]] = vector.extract %[[T_MASK]][0] : vector<[2]xi1> from vector<3x[2]xi1> +// CHECK: vector.mask %[[MASK0]] { vector.outerproduct {{.*}} {kind = #vector.kind} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> + +// CHECK: %[[MASK1:.*]] = vector.extract %[[T_MASK]][1] : vector<[2]xi1> from vector<3x[2]xi1> +// CHECK: vector.mask %[[MASK1]] { vector.outerproduct {{.*}} {kind = #vector.kind} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> + +// CHECK: %[[MASK2:.*]] = vector.extract %[[T_MASK]][2] : vector<[2]xi1> from vector<3x[2]xi1> +// CHECK: vector.mask %[[MASK2]] { vector.outerproduct {{.*}} {kind = #vector.kind} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> +func.func @masked_matvec_mk_k_m_max_scalable_parallel_dim(%A: vector<[2]x3xf32>, + %x: vector<3xf32>, + %b: vector<[2]xf32>, + %m: vector<[2]x3xi1>) -> vector<[2]xf32> { + %0 = vector.mask %m { vector.contract #matvecmax_trait %A, %x, %b + : vector<[2]x3xf32>, vector<3xf32> into vector<[2]xf32> } : vector<[2]x3xi1> -> vector<[2]xf32> + return %0 : vector<[2]xf32> +} + // ============================================================================ // Matvec 2 (plain + masked + scalable) // ============================================================================ From 2c976a1fac5c0d6fe1cd7c3637f3d16cc378f52b Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 1 Dec 2023 09:47:26 +0100 Subject: [PATCH 11/72] [libc] Fix _Float16 detection for x86 (#73947) --- libc/src/__support/macros/properties/CMakeLists.txt | 1 + libc/src/__support/macros/properties/float.h | 3 ++- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libc/src/__support/macros/properties/CMakeLists.txt b/libc/src/__support/macros/properties/CMakeLists.txt index e37cdb78bfa2c5..ee87ce68c9da37 100644 --- a/libc/src/__support/macros/properties/CMakeLists.txt +++ b/libc/src/__support/macros/properties/CMakeLists.txt @@ -31,5 +31,6 @@ add_header_library( DEPENDS .architectures .compiler + .cpu_features .os ) diff --git a/libc/src/__support/macros/properties/float.h b/libc/src/__support/macros/properties/float.h index 7e00ddc8f0cd32..4bafc3777a4714 100644 --- a/libc/src/__support/macros/properties/float.h +++ b/libc/src/__support/macros/properties/float.h @@ -13,6 +13,7 @@ #include "src/__support/macros/properties/architectures.h" #include "src/__support/macros/properties/compiler.h" +#include "src/__support/macros/properties/cpu_features.h" #include "src/__support/macros/properties/os.h" #include // LDBL_MANT_DIG @@ -30,7 +31,7 @@ #endif // float16 support. -#if defined(LIBC_TARGET_ARCH_IS_X86_64) +#if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2) #if (defined(LIBC_COMPILER_CLANG_VER) && (LIBC_COMPILER_CLANG_VER >= 1500)) || \ (defined(LIBC_COMPILER_GCC_VER) && (LIBC_COMPILER_GCC_VER >= 1201)) #define LIBC_COMPILER_HAS_C23_FLOAT16 diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index fdd620a4d415c0..d53ca202101537 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -86,6 +86,7 @@ libc_support_library( deps = [ ":__support_macros_properties_architectures", ":__support_macros_properties_compiler", + ":__support_macros_properties_cpu_features", ":__support_macros_properties_os", ], ) From d55692d60d218f402ce107520daabed15f2d9ef6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 10 Nov 2023 22:49:21 +0900 Subject: [PATCH 12/72] Reapply "ValueTracking: Identify implied fp classes by general fcmp (#66505)" This reverts commit 96a0d714d58e48c363ee6abbbcdfd7a6ce646ac1. Avoid assert with dynamic denormal-fp-math We don't recognize compares with 0 as an exact class test if we don't know the denormal mode. We could try to do better here, but it's probably not worth it. Fixes asserts reported after 1adce7d8e47e2438f99f91607760b825e5e3cc37 --- llvm/include/llvm/Analysis/ValueTracking.h | 21 + llvm/lib/Analysis/ValueTracking.cpp | 185 +++++++- .../Attributor/nofpclass-implied-by-fcmp.ll | 446 +++++++++--------- .../assume-fcmp-constant-implies-class.ll | 270 ++++------- 4 files changed, 500 insertions(+), 422 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index f353eec8c89bb9..82c87edd6297cd 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -214,6 +214,27 @@ std::pair fcmpToClassTest(CmpInst::Predicate Pred, const APFloat *ConstRHS, bool LookThroughSrc = true); +/// Compute the possible floating-point classes that \p LHS could be based on an +/// fcmp returning true. Returns { TestedValue, ClassesIfTrue, ClassesIfFalse } +/// +/// If the compare returns an exact class test, ClassesIfTrue == ~ClassesIfFalse +/// +/// This is a less exact version of fcmpToClassTest (e.g. fcmpToClassTest will +/// only succeed for a test of x > 0 implies positive, but not x > 1). +/// +/// If \p LookThroughSrc is true, consider the input value when computing the +/// mask. This may look through sign bit operations. +/// +/// If \p LookThroughSrc is false, ignore the source value (i.e. the first pair +/// element will always be LHS. +/// +std::tuple +fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, + const APFloat *ConstRHS, bool LookThroughSrc = true); +std::tuple +fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, + Value *RHS, bool LookThroughSrc = true); + struct KnownFPClass { /// Floating-point classes the value could be one of. FPClassTest KnownFPClasses = fcAllFlags; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index ef8fa5826deb94..9cfe7315a7a4dc 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4164,6 +4164,147 @@ llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, return {Src, Mask}; } +std::tuple +llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, + const APFloat *ConstRHS, bool LookThroughSrc) { + auto [Val, ClassMask] = + fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc); + if (Val) + return {Val, ClassMask, ~ClassMask}; + + FPClassTest RHSClass = ConstRHS->classify(); + + // If we see a zero here, we are using dynamic denormal-fp-math, and can't + // treat comparisons to 0 as an exact class test. + // + // TODO: We could do better and still recognize non-equality cases. + if (RHSClass == fcPosZero || RHSClass == fcNegZero) + return {nullptr, fcAllFlags, fcAllFlags}; + + assert((RHSClass == fcPosNormal || RHSClass == fcNegNormal || + RHSClass == fcPosSubnormal || RHSClass == fcNegSubnormal) && + "should have been recognized as an exact class test"); + + const bool IsNegativeRHS = (RHSClass & fcNegative) == RHSClass; + const bool IsPositiveRHS = (RHSClass & fcPositive) == RHSClass; + + assert(IsNegativeRHS == ConstRHS->isNegative()); + assert(IsPositiveRHS == !ConstRHS->isNegative()); + + Value *Src = LHS; + const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src))); + + if (IsFabs) + RHSClass = llvm::inverse_fabs(RHSClass); + + if (Pred == FCmpInst::FCMP_OEQ) + return {Src, RHSClass, fcAllFlags}; + + if (Pred == FCmpInst::FCMP_UEQ) { + FPClassTest Class = RHSClass | fcNan; + return {Src, Class, ~fcNan}; + } + + if (Pred == FCmpInst::FCMP_ONE) + return {Src, ~fcNan, RHSClass}; + + if (Pred == FCmpInst::FCMP_UNE) + return {Src, fcAllFlags, RHSClass}; + + if (IsNegativeRHS) { + // TODO: Handle fneg(fabs) + if (IsFabs) { + // fabs(x) o> -k -> fcmp ord x, x + // fabs(x) u> -k -> true + // fabs(x) o< -k -> false + // fabs(x) u< -k -> fcmp uno x, x + switch (Pred) { + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: + return {Src, ~fcNan, fcNan}; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + return {Src, fcAllFlags, fcNone}; + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: + return {Src, fcNone, fcAllFlags}; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + return {Src, fcNan, ~fcNan}; + default: + break; + } + + return {nullptr, fcAllFlags, fcAllFlags}; + } + + FPClassTest ClassesLE = fcNegInf | fcNegNormal; + FPClassTest ClassesGE = fcPositive | fcNegZero | fcNegSubnormal; + + if (ConstRHS->isDenormal()) + ClassesLE |= fcNegSubnormal; + else + ClassesGE |= fcNegNormal; + + switch (Pred) { + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: + return {Src, ClassesGE, ~ClassesGE | RHSClass}; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass}; + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: + return {Src, ClassesLE, ~ClassesLE | RHSClass}; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass}; + default: + break; + } + } else if (IsPositiveRHS) { + FPClassTest ClassesGE = fcPosNormal | fcPosInf; + FPClassTest ClassesLE = fcNegative | fcPosZero | fcPosNormal; + if (ConstRHS->isDenormal()) + ClassesGE |= fcPosNormal; + else + ClassesLE |= fcPosSubnormal; + + if (IsFabs) { + ClassesGE = llvm::inverse_fabs(ClassesGE); + ClassesLE = llvm::inverse_fabs(ClassesLE); + } + + switch (Pred) { + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: + return {Src, ClassesGE, ~ClassesGE | RHSClass}; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + return {Src, ClassesGE | fcNan, ~(ClassesGE | fcNan) | RHSClass}; + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: + return {Src, ClassesLE, ~ClassesLE | RHSClass}; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + return {Src, ClassesLE | fcNan, ~(ClassesLE | fcNan) | RHSClass}; + default: + break; + } + } + + return {nullptr, fcAllFlags, fcAllFlags}; +} + +std::tuple +llvm::fcmpImpliesClass(CmpInst::Predicate Pred, const Function &F, Value *LHS, + Value *RHS, bool LookThroughSrc) { + const APFloat *ConstRHS; + if (!match(RHS, m_APFloatAllowUndef(ConstRHS))) + return {nullptr, fcAllFlags, fcNone}; + return fcmpImpliesClass(Pred, F, LHS, ConstRHS, LookThroughSrc); +} + static FPClassTest computeKnownFPClassFromAssumes(const Value *V, const SimplifyQuery &Q) { FPClassTest KnownFromAssume = fcAllFlags; @@ -4188,18 +4329,21 @@ static FPClassTest computeKnownFPClassFromAssumes(const Value *V, Value *LHS, *RHS; uint64_t ClassVal = 0; if (match(I->getArgOperand(0), m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) { - auto [TestedValue, TestedMask] = - fcmpToClassTest(Pred, *F, LHS, RHS, true); - // First see if we can fold in fabs/fneg into the test. - if (TestedValue == V) - KnownFromAssume &= TestedMask; - else { - // Try again without the lookthrough if we found a different source - // value. - auto [TestedValue, TestedMask] = - fcmpToClassTest(Pred, *F, LHS, RHS, false); - if (TestedValue == V) - KnownFromAssume &= TestedMask; + const APFloat *CRHS; + if (match(RHS, m_APFloat(CRHS))) { + // First see if we can fold in fabs/fneg into the test. + auto [CmpVal, MaskIfTrue, MaskIfFalse] = + fcmpImpliesClass(Pred, *F, LHS, CRHS, true); + if (CmpVal == V) + KnownFromAssume &= MaskIfTrue; + else { + // Try again without the lookthrough if we found a different source + // value. + auto [CmpVal, MaskIfTrue, MaskIfFalse] = + fcmpImpliesClass(Pred, *F, LHS, CRHS, false); + if (CmpVal == V) + KnownFromAssume &= MaskIfTrue; + } } } else if (match(I->getArgOperand(0), m_Intrinsic( @@ -4347,7 +4491,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest FilterRHS = fcAllFlags; Value *TestedValue = nullptr; - FPClassTest TestedMask = fcNone; + FPClassTest MaskIfTrue = fcAllFlags; + FPClassTest MaskIfFalse = fcAllFlags; uint64_t ClassVal = 0; const Function *F = cast(Op)->getFunction(); CmpInst::Predicate Pred; @@ -4359,20 +4504,22 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, // TODO: In some degenerate cases we can infer something if we try again // without looking through sign operations. bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; - std::tie(TestedValue, TestedMask) = - fcmpToClassTest(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg); + std::tie(TestedValue, MaskIfTrue, MaskIfFalse) = + fcmpImpliesClass(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg); } else if (match(Cond, m_Intrinsic( m_Value(TestedValue), m_ConstantInt(ClassVal)))) { - TestedMask = static_cast(ClassVal); + FPClassTest TestedMask = static_cast(ClassVal); + MaskIfTrue = TestedMask; + MaskIfFalse = ~TestedMask; } if (TestedValue == LHS) { // match !isnan(x) ? x : y - FilterLHS = TestedMask; - } else if (TestedValue == RHS) { + FilterLHS = MaskIfTrue; + } else if (TestedValue == RHS) { // && IsExactClass // match !isnan(x) ? y : x - FilterRHS = ~TestedMask; + FilterRHS = MaskIfFalse; } KnownFPClass Known2; diff --git a/llvm/test/Transforms/Attributor/nofpclass-implied-by-fcmp.ll b/llvm/test/Transforms/Attributor/nofpclass-implied-by-fcmp.ll index ea594398c58014..d19b0ee3dc2dd5 100644 --- a/llvm/test/Transforms/Attributor/nofpclass-implied-by-fcmp.ll +++ b/llvm/test/Transforms/Attributor/nofpclass-implied-by-fcmp.ll @@ -11,8 +11,8 @@ declare void @llvm.assume(i1 noundef) ; can't be +inf define float @clamp_is_ogt_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ogt_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-LABEL: define nofpclass(pinf) float @clamp_is_ogt_1_to_1( +; CHECK-SAME: float nofpclass(pinf) [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[IS_OGT_1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGT_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -23,8 +23,8 @@ define float @clamp_is_ogt_1_to_1(float %arg) { } define float @clamp_is_ogt_1_to_1_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_ogt_1_to_1_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(pinf) float @clamp_is_ogt_1_to_1_commute( +; CHECK-SAME: float nofpclass(pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULE_1:%.*]] = fcmp ule float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULE_1]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -36,8 +36,8 @@ define float @clamp_is_ogt_1_to_1_commute(float %arg) { ; can't be +inf or nan define float @clamp_is_ugt_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ugt_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan pinf) float @clamp_is_ugt_1_to_1( +; CHECK-SAME: float nofpclass(nan pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGT_1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -49,8 +49,8 @@ define float @clamp_is_ugt_1_to_1(float %arg) { ; can't be +inf or nan define float @clamp_is_ugt_1_to_1_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_ugt_1_to_1_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan pinf) float @clamp_is_ugt_1_to_1_commute( +; CHECK-SAME: float nofpclass(nan pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLE_1:%.*]] = fcmp ole float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLE_1]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -62,8 +62,8 @@ define float @clamp_is_ugt_1_to_1_commute(float %arg) { ; can't be +inf define float @clamp_is_oge_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_oge_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(pinf) float @clamp_is_oge_1_to_1( +; CHECK-SAME: float nofpclass(pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGE_1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGE_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -74,8 +74,8 @@ define float @clamp_is_oge_1_to_1(float %arg) { } define float @clamp_is_oge_1_to_1_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_oge_1_to_1_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(pinf) float @clamp_is_oge_1_to_1_commute( +; CHECK-SAME: float nofpclass(pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULT_1:%.*]] = fcmp ult float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULT_1]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -87,8 +87,8 @@ define float @clamp_is_oge_1_to_1_commute(float %arg) { ; can't be +inf or nan define float @clamp_is_uge_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_uge_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan pinf) float @clamp_is_uge_1_to_1( +; CHECK-SAME: float nofpclass(nan pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGT_1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -100,8 +100,8 @@ define float @clamp_is_uge_1_to_1(float %arg) { ; can't be negative, zero, or denormal define float @clamp_is_olt_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_olt_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf zero sub nnorm) float @clamp_is_olt_1_to_1( +; CHECK-SAME: float nofpclass(ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLT_1:%.*]] = fcmp olt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLT_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -113,8 +113,8 @@ define float @clamp_is_olt_1_to_1(float %arg) { ; can't be negative, zero, or denormal define float @clamp_is_olt_1_to_1_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_olt_1_to_1_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf zero sub nnorm) float @clamp_is_olt_1_to_1_commute( +; CHECK-SAME: float nofpclass(ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGE_1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGE_1]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -126,8 +126,8 @@ define float @clamp_is_olt_1_to_1_commute(float %arg) { ; can't be negative or zero, nan or denormal define float @clamp_is_ult_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ult_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf zero sub nnorm) float @clamp_is_ult_1_to_1( +; CHECK-SAME: float nofpclass(nan ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULT_1:%.*]] = fcmp ult float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULT_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -139,8 +139,8 @@ define float @clamp_is_ult_1_to_1(float %arg) { ; can't be negative or zero, nan or denormal define float @clamp_is_ult_1_to_1_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_ult_1_to_1_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf zero sub nnorm) float @clamp_is_ult_1_to_1_commute( +; CHECK-SAME: float nofpclass(nan ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGE_1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGE_1]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -152,8 +152,8 @@ define float @clamp_is_ult_1_to_1_commute(float %arg) { ; can't be negative, zero or denormal define float @clamp_is_ole_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ole_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf zero sub nnorm) float @clamp_is_ole_1_to_1( +; CHECK-SAME: float nofpclass(ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLE_1:%.*]] = fcmp ole float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLE_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -165,8 +165,8 @@ define float @clamp_is_ole_1_to_1(float %arg) { ; can't be negative or zero, nan or denormal define float @clamp_is_ule_1_to_1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ule_1_to_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf zero sub nnorm) float @clamp_is_ule_1_to_1( +; CHECK-SAME: float nofpclass(nan ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULE_1:%.*]] = fcmp ule float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULE_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -178,8 +178,8 @@ define float @clamp_is_ule_1_to_1(float %arg) { ; can't be negative or denormal define float @clamp_is_olt_1_to_0(float %arg) { -; CHECK-LABEL: define float @clamp_is_olt_1_to_0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_is_olt_1_to_0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLT_1:%.*]] = fcmp olt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLT_1]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -191,8 +191,8 @@ define float @clamp_is_olt_1_to_0(float %arg) { ; can't be negative, nan or denormal define float @clamp_is_ult_1_to_0(float %arg) { -; CHECK-LABEL: define float @clamp_is_ult_1_to_0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_is_ult_1_to_0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULT_1:%.*]] = fcmp olt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULT_1]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -204,8 +204,8 @@ define float @clamp_is_ult_1_to_0(float %arg) { ; can't be negative or denormal define float @clamp_is_ole_1_to_0(float %arg) { -; CHECK-LABEL: define float @clamp_is_ole_1_to_0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_is_ole_1_to_0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLE_1:%.*]] = fcmp ole float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLE_1]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -217,8 +217,8 @@ define float @clamp_is_ole_1_to_0(float %arg) { ; can't be negative or denormal define float @clamp_is_ole_1_to_0_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_ole_1_to_0_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_is_ole_1_to_0_commute( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGT_1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_1]], float [[ARG]], float 0.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -230,8 +230,8 @@ define float @clamp_is_ole_1_to_0_commute(float %arg) { ; can't be negative or denormal define float @clamp_is_ule_1_to_0(float %arg) { -; CHECK-LABEL: define float @clamp_is_ule_1_to_0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_is_ule_1_to_0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULE_1:%.*]] = fcmp ole float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULE_1]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -243,8 +243,8 @@ define float @clamp_is_ule_1_to_0(float %arg) { ; can't be positive, zero or denormal define float @clamp_is_ogt_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ogt_neg1_to_neg1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(pinf zero sub pnorm) float @clamp_is_ogt_neg1_to_neg1( +; CHECK-SAME: float nofpclass(pinf zero sub pnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGT_NEG1:%.*]] = fcmp ogt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGT_NEG1]], float -1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -256,8 +256,8 @@ define float @clamp_is_ogt_neg1_to_neg1(float %arg) { ; can't be positive, zero, nan or denormal define float @clamp_is_ugt_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ugt_neg1_to_neg1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan pinf zero sub pnorm) float @clamp_is_ugt_neg1_to_neg1( +; CHECK-SAME: float nofpclass(nan pinf zero sub pnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGT_NEG1:%.*]] = fcmp ugt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_NEG1]], float -1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -269,8 +269,8 @@ define float @clamp_is_ugt_neg1_to_neg1(float %arg) { ; can't be positive or denormal define float @clamp_is_ogt_neg1_to_0(float %arg) { -; CHECK-LABEL: define float @clamp_is_ogt_neg1_to_0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(pinf nzero sub pnorm) float @clamp_is_ogt_neg1_to_0( +; CHECK-SAME: float nofpclass(pinf nzero sub pnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGT_NEG1:%.*]] = fcmp ogt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGT_NEG1]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -282,8 +282,8 @@ define float @clamp_is_ogt_neg1_to_0(float %arg) { ; can't be positive, nan or denormal define float @clamp_is_ugt_neg1_to_0(float %arg) { -; CHECK-LABEL: define float @clamp_is_ugt_neg1_to_0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan pinf nzero sub pnorm) float @clamp_is_ugt_neg1_to_0( +; CHECK-SAME: float nofpclass(nan pinf nzero sub pnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGT_NEG1:%.*]] = fcmp ugt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_NEG1]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -295,8 +295,8 @@ define float @clamp_is_ugt_neg1_to_0(float %arg) { ; can't be -inf define float @clamp_is_olt_neg1_to_neg1_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_olt_neg1_to_neg1_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf) float @clamp_is_olt_neg1_to_neg1_commute( +; CHECK-SAME: float nofpclass(ninf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGE_NEG1:%.*]] = fcmp uge float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGE_NEG1]], float [[ARG]], float -1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -308,8 +308,8 @@ define float @clamp_is_olt_neg1_to_neg1_commute(float %arg) { ; can't be -inf define float @clamp_is_olt_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define float @clamp_is_olt_neg1_to_neg1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf) float @clamp_is_olt_neg1_to_neg1( +; CHECK-SAME: float nofpclass(ninf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLT_NEG1]], float -1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -321,8 +321,8 @@ define float @clamp_is_olt_neg1_to_neg1(float %arg) { ; can't be -inf or nan define float @clamp_is_ult_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define float @clamp_is_ult_neg1_to_neg1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf) float @clamp_is_ult_neg1_to_neg1( +; CHECK-SAME: float nofpclass(nan ninf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULT_NEG1]], float -1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -334,8 +334,8 @@ define float @clamp_is_ult_neg1_to_neg1(float %arg) { ; can't be -inf or nan define float @clamp_is_ult_neg1_to_neg1_commute(float %arg) { -; CHECK-LABEL: define float @clamp_is_ult_neg1_to_neg1_commute( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf) float @clamp_is_ult_neg1_to_neg1_commute( +; CHECK-SAME: float nofpclass(nan ninf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGE_NEG1:%.*]] = fcmp oge float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGE_NEG1]], float [[ARG]], float -1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -351,8 +351,8 @@ define float @clamp_is_ult_neg1_to_neg1_commute(float %arg) { ; Must be 1, only posnormal define float @fcmp_oeq_1_else_1(float %arg) { -; CHECK-LABEL: define float @fcmp_oeq_1_else_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @fcmp_oeq_1_else_1( +; CHECK-SAME: float nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OEQ_1:%.*]] = fcmp oeq float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OEQ_1]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -364,8 +364,8 @@ define float @fcmp_oeq_1_else_1(float %arg) { ; Don't know anything define float @fcmp_one_1_else_1(float %arg) { -; CHECK-LABEL: define float @fcmp_one_1_else_1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan) float @fcmp_one_1_else_1( +; CHECK-SAME: float nofpclass(nan) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ONE_1:%.*]] = fcmp one float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ONE_1]], float [[ARG]], float 1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -377,8 +377,8 @@ define float @fcmp_one_1_else_1(float %arg) { ; must be 1 define float @fcmp_one_1_1_else_arg(float %arg) { -; CHECK-LABEL: define float @fcmp_one_1_1_else_arg( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @fcmp_one_1_1_else_arg( +; CHECK-SAME: float nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ONE_1:%.*]] = fcmp one float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ONE_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -390,8 +390,8 @@ define float @fcmp_one_1_1_else_arg(float %arg) { ; must be 1 define float @fcmp_une_1_1_else_arg(float %arg) { -; CHECK-LABEL: define float @fcmp_une_1_1_else_arg( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @fcmp_une_1_1_else_arg( +; CHECK-SAME: float nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UNE_1:%.*]] = fcmp une float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UNE_1]], float 1.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -403,8 +403,8 @@ define float @fcmp_une_1_1_else_arg(float %arg) { ; Must be -1, only negnormal define float @fcmp_oeq_neg1_else_neg1(float %arg) { -; CHECK-LABEL: define float @fcmp_oeq_neg1_else_neg1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub pnorm) float @fcmp_oeq_neg1_else_neg1( +; CHECK-SAME: float nofpclass(nan inf zero sub pnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OEQ_NEG1:%.*]] = fcmp oeq float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OEQ_NEG1]], float [[ARG]], float -1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -416,8 +416,8 @@ define float @fcmp_oeq_neg1_else_neg1(float %arg) { ; Don't know anything define float @fcmp_one_neg1_else_neg1(float %arg) { -; CHECK-LABEL: define float @fcmp_one_neg1_else_neg1( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan) float @fcmp_one_neg1_else_neg1( +; CHECK-SAME: float nofpclass(nan) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ONE_NEG1:%.*]] = fcmp one float [[ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ONE_NEG1]], float [[ARG]], float -1.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -512,8 +512,8 @@ define float @if_fcmp_one_0_1_else_arg(float %arg) { } define float @if_fcmp_one_1_arg_else_0(float %arg) { -; CHECK-LABEL: define float @if_fcmp_one_1_arg_else_0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan) float @if_fcmp_one_1_arg_else_0( +; CHECK-SAME: float nofpclass(nan) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ONE_1:%.*]] = fcmp one float [[ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ONE_1]], float [[ARG]], float 0.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -524,8 +524,8 @@ define float @if_fcmp_one_1_arg_else_0(float %arg) { } define float @fcmp_fabs_oeq_1_else_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @fcmp_fabs_oeq_1_else_1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @fcmp_fabs_oeq_1_else_1( +; CHECK-SAME: float nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: [[FABS_IS_OEQ_1:%.*]] = fcmp oeq float [[FABS_ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_OEQ_1]], float [[ARG]], float 1.000000e+00 @@ -552,8 +552,8 @@ define float @fcmp_fabs_oeq_1_0_else_arg(float %arg) { } define float @fcmp_fabs_ueq_1_0_else_arg(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @fcmp_fabs_ueq_1_0_else_arg( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @fcmp_fabs_ueq_1_0_else_arg( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_UEQ_1:%.*]] = fcmp ueq float [[FABS_ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_UEQ_1]], float 0.000000e+00, float [[ARG]] @@ -566,8 +566,8 @@ define float @fcmp_fabs_ueq_1_0_else_arg(float %arg) { } define float @fcmp_fabs_one_1_arg_else_0(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @fcmp_fabs_one_1_arg_else_0( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @fcmp_fabs_one_1_arg_else_0( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_ONE_1:%.*]] = fcmp one float [[FABS_ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_ONE_1]], float [[ARG]], float 0.000000e+00 @@ -594,8 +594,8 @@ define float @fcmp_fabs_une_1_arg_else_0(float %arg) { } define float @fcmp_fabs_one_1_0_else_arg(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @fcmp_fabs_one_1_0_else_arg( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf nzero sub nnorm) float @fcmp_fabs_one_1_0_else_arg( +; CHECK-SAME: float nofpclass(nan inf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_ONE_1:%.*]] = fcmp one float [[FABS_ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_ONE_1]], float 0.000000e+00, float [[ARG]] @@ -608,8 +608,8 @@ define float @fcmp_fabs_one_1_0_else_arg(float %arg) { } define float @fcmp_fabs_une_1_0_else_arg(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @fcmp_fabs_une_1_0_else_arg( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf nzero sub nnorm) float @fcmp_fabs_une_1_0_else_arg( +; CHECK-SAME: float nofpclass(nan inf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_UNE_1:%.*]] = fcmp une float [[FABS_ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_UNE_1]], float 0.000000e+00, float [[ARG]] @@ -622,8 +622,8 @@ define float @fcmp_fabs_une_1_0_else_arg(float %arg) { } define float @fcmp_fabs_one_1_neg2_else_arg(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub) float @fcmp_fabs_one_1_neg2_else_arg( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub) float @fcmp_fabs_one_1_neg2_else_arg( +; CHECK-SAME: float nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_ONE_1:%.*]] = fcmp one float [[FABS_ARG]], 1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_ONE_1]], float -2.000000e+00, float [[ARG]] @@ -640,8 +640,8 @@ define float @fcmp_fabs_one_1_neg2_else_arg(float %arg) { ;--------------------------------------------------------------------- define float @clamp_olt_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_olt_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero nsub norm) float @clamp_olt_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(ninf nzero nsub norm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLT_LARGEST_DENORMAL:%.*]] = fcmp olt float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLT_LARGEST_DENORMAL]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -652,8 +652,8 @@ define float @clamp_olt_largest_denormal_0.0(float %arg) { } define float @clamp_ole_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_ole_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero nsub norm) float @clamp_ole_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(ninf nzero nsub norm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OLE_LARGEST_DENORMAL:%.*]] = fcmp ole float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLE_LARGEST_DENORMAL]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -664,8 +664,8 @@ define float @clamp_ole_largest_denormal_0.0(float %arg) { } define float @clamp_ult_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_ult_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub norm) float @clamp_ult_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub norm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULT_LARGEST_DENORMAL:%.*]] = fcmp ult float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULT_LARGEST_DENORMAL]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -676,8 +676,8 @@ define float @clamp_ult_largest_denormal_0.0(float %arg) { } define float @clamp_ule_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_ule_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub norm) float @clamp_ule_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub norm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_ULE_LARGEST_DENORMAL:%.*]] = fcmp ule float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ULE_LARGEST_DENORMAL]], float 0.000000e+00, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -688,8 +688,8 @@ define float @clamp_ule_largest_denormal_0.0(float %arg) { } define float @clamp_ogt_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_ogt_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_ogt_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGT_LARGEST_DENORMAL:%.*]] = fcmp ugt float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGT_LARGEST_DENORMAL]], float [[ARG]], float 0.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -700,8 +700,8 @@ define float @clamp_ogt_largest_denormal_0.0(float %arg) { } define float @clamp_oge_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_oge_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero sub nnorm) float @clamp_oge_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(nan ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGE_LARGEST_DENORMAL:%.*]] = fcmp oge float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGE_LARGEST_DENORMAL]], float [[ARG]], float 0.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -712,8 +712,8 @@ define float @clamp_oge_largest_denormal_0.0(float %arg) { } define float @clamp_ugt_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_ugt_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_ugt_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGT_LARGEST_DENORMAL:%.*]] = fcmp ugt float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_LARGEST_DENORMAL]], float [[ARG]], float 0.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -724,8 +724,8 @@ define float @clamp_ugt_largest_denormal_0.0(float %arg) { } define float @clamp_uge_largest_denormal_0.0(float %arg) { -; CHECK-LABEL: define float @clamp_uge_largest_denormal_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_uge_largest_denormal_0.0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGE_LARGEST_DENORMAL:%.*]] = fcmp uge float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGE_LARGEST_DENORMAL]], float [[ARG]], float 0.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -736,8 +736,8 @@ define float @clamp_uge_largest_denormal_0.0(float %arg) { } define float @fcmp_oeq_largest_denormal_arg_else_0.0(float %arg) { -; CHECK-LABEL: define float @fcmp_oeq_largest_denormal_arg_else_0.0( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf nzero nsub norm) float @fcmp_oeq_largest_denormal_arg_else_0.0( +; CHECK-SAME: float nofpclass(nan inf nzero nsub norm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OEQ_LARGEST_DENORMAL:%.*]] = fcmp oeq float [[ARG]], 0x380FFFFFC0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OEQ_LARGEST_DENORMAL]], float [[ARG]], float 0.000000e+00 ; CHECK-NEXT: ret float [[SELECT]] @@ -893,8 +893,8 @@ define float @clamp_fabs_value_ule_1_to_1_copysign(float %arg) { ; Can't be +inf define float @clamp_is_ogt_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define float @clamp_is_ogt_largest_normal_to_largest_normal( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(pinf) float @clamp_is_ogt_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGT_LARGEST_NORMAL:%.*]] = fcmp ogt float [[ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGT_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -906,8 +906,8 @@ define float @clamp_is_ogt_largest_normal_to_largest_normal(float %arg) { ; Can't be +inf define float @clamp_is_oge_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define float @clamp_is_oge_largest_normal_to_largest_normal( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(pinf) float @clamp_is_oge_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_OGE_LARGEST_NORMAL:%.*]] = fcmp oge float [[ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGE_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -919,8 +919,8 @@ define float @clamp_is_oge_largest_normal_to_largest_normal(float %arg) { ; Can't be +inf or nan define float @clamp_is_ugt_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define float @clamp_is_ugt_largest_normal_to_largest_normal( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan pinf) float @clamp_is_ugt_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(nan pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGT_LARGEST_NORMAL:%.*]] = fcmp ugt float [[ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -932,8 +932,8 @@ define float @clamp_is_ugt_largest_normal_to_largest_normal(float %arg) { ; Can't be +inf or nan define float @clamp_is_uge_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define float @clamp_is_uge_largest_normal_to_largest_normal( -; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan pinf) float @clamp_is_uge_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(nan pinf) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[IS_UGE_LARGEST_NORMAL:%.*]] = fcmp uge float [[ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGE_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] ; CHECK-NEXT: ret float [[SELECT]] @@ -945,8 +945,8 @@ define float @clamp_is_uge_largest_normal_to_largest_normal(float %arg) { ; Can't be +inf or -inf define float @clamp_fabs_is_ogt_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_is_ogt_largest_normal_to_largest_normal( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @clamp_fabs_is_ogt_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_OGT_LARGEST_NORMAL:%.*]] = fcmp ogt float [[FABS_ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGT_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] @@ -960,8 +960,8 @@ define float @clamp_fabs_is_ogt_largest_normal_to_largest_normal(float %arg) { ; Can't be +inf or -inf define float @clamp_fabs_is_oge_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_is_oge_largest_normal_to_largest_normal( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @clamp_fabs_is_oge_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_OGE_LARGEST_NORMAL:%.*]] = fcmp oge float [[FABS_ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGE_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] @@ -975,8 +975,8 @@ define float @clamp_fabs_is_oge_largest_normal_to_largest_normal(float %arg) { ; Can't be +inf or -inf or nan define float @clamp_fabs_is_ugt_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_is_ugt_largest_normal_to_largest_normal( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) float @clamp_fabs_is_ugt_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(nan inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_UGT_LARGEST_NORMAL:%.*]] = fcmp ugt float [[FABS_ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] @@ -990,8 +990,8 @@ define float @clamp_fabs_is_ugt_largest_normal_to_largest_normal(float %arg) { ; Can't be +inf or -inf or nan define float @clamp_fabs_is_uge_largest_normal_to_largest_normal(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_is_uge_largest_normal_to_largest_normal( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) float @clamp_fabs_is_uge_largest_normal_to_largest_normal( +; CHECK-SAME: float nofpclass(nan inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_UGT_LARGEST_NORMAL:%.*]] = fcmp uge float [[FABS_ARG]], 0x47EFFFFFE0000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UGT_LARGEST_NORMAL]], float 0x47EFFFFFE0000000, float [[ARG]] @@ -1009,8 +1009,8 @@ define float @clamp_fabs_is_uge_largest_normal_to_largest_normal(float %arg) { ; can't be negative or positive subnormal define float @clamp_fabs_ogt_smallest_normal_to_zero(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_ogt_smallest_normal_to_zero( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @clamp_fabs_ogt_smallest_normal_to_zero( +; CHECK-SAME: float nofpclass(inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_OGT_SMALLEST_NORMAL:%.*]] = fcmp ogt float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OGT_SMALLEST_NORMAL]], float 0.000000e+00, float [[ARG]] @@ -1055,8 +1055,8 @@ define float @clamp_fabs_olt_smallest_normal_to_zero(float %arg) { ; can't be negative or subnormal define float @clamp_fabs_ole_smallest_normal_to_zero(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_ole_smallest_normal_to_zero( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_fabs_ole_smallest_normal_to_zero( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_OLE_SMALLEST_NORMAL:%.*]] = fcmp ole float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLE_SMALLEST_NORMAL]], float 0.000000e+00, float [[ARG]] @@ -1083,8 +1083,8 @@ define float @clamp_fabs_is_is_olt_smallest_normal_to_0(float %arg) { } define float @clamp_fabs_is_is_ole_smallest_normal_to_0(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_is_is_ole_smallest_normal_to_0( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(ninf nzero sub nnorm) float @clamp_fabs_is_is_ole_smallest_normal_to_0( +; CHECK-SAME: float nofpclass(ninf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_OLE_SMALLEST_NORMAL:%.*]] = fcmp ole float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_OLE_SMALLEST_NORMAL]], float 0.000000e+00, float [[ARG]] @@ -1111,8 +1111,8 @@ define float @clamp_fabs_oeq_smallest_normal_to_zero(float %arg) { } define float @clamp_fabs_one_smallest_normal_to_zero(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_one_smallest_normal_to_zero( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf nzero sub nnorm) float @clamp_fabs_one_smallest_normal_to_zero( +; CHECK-SAME: float nofpclass(nan inf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_ONE_SMALLEST_NORMAL:%.*]] = fcmp one float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_ONE_SMALLEST_NORMAL]], float 0.000000e+00, float [[ARG]] @@ -1125,8 +1125,8 @@ define float @clamp_fabs_one_smallest_normal_to_zero(float %arg) { } define float @clamp_fabs_ueq_smallest_normal_to_zero(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_ueq_smallest_normal_to_zero( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @clamp_fabs_ueq_smallest_normal_to_zero( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_UEQ_SMALLEST_NORMAL:%.*]] = fcmp ueq float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UEQ_SMALLEST_NORMAL]], float 0.000000e+00, float [[ARG]] @@ -1139,8 +1139,8 @@ define float @clamp_fabs_ueq_smallest_normal_to_zero(float %arg) { } define float @clamp_fabs_une_smallest_normal_to_zero(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @clamp_fabs_une_smallest_normal_to_zero( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf nzero sub nnorm) float @clamp_fabs_une_smallest_normal_to_zero( +; CHECK-SAME: float nofpclass(nan inf nzero sub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_UNE_SMALLEST_NORMAL:%.*]] = fcmp une float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[IS_UNE_SMALLEST_NORMAL]], float 0.000000e+00, float [[ARG]] @@ -1179,8 +1179,8 @@ define float @clamp_fabs_ole_neg1_to_neg1(float %arg) { } define float @clamp_fabs_ult_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub) float @clamp_fabs_ult_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub) float @clamp_fabs_ult_neg1_to_neg1( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_ULT_NEG1:%.*]] = fcmp ult float [[FABS_ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_ULT_NEG1]], float -1.000000e+00, float [[ARG]] @@ -1193,8 +1193,8 @@ define float @clamp_fabs_ult_neg1_to_neg1(float %arg) { } define float @clamp_fabs_ule_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub) float @clamp_fabs_ule_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub) float @clamp_fabs_ule_neg1_to_neg1( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_ULE_NEG1:%.*]] = fcmp ule float [[FABS_ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_ULE_NEG1]], float -1.000000e+00, float [[ARG]] @@ -1207,8 +1207,8 @@ define float @clamp_fabs_ule_neg1_to_neg1(float %arg) { } define float @clamp_fabs_ogt_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub) float @clamp_fabs_ogt_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(inf zero sub pnorm) float @clamp_fabs_ogt_neg1_to_neg1( +; CHECK-SAME: float nofpclass(inf zero sub norm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_OGT_NEG1:%.*]] = fcmp ogt float [[FABS_ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_OGT_NEG1]], float -1.000000e+00, float [[ARG]] @@ -1221,8 +1221,8 @@ define float @clamp_fabs_ogt_neg1_to_neg1(float %arg) { } define float @clamp_fabs_oge_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub) float @clamp_fabs_oge_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(inf zero sub pnorm) float @clamp_fabs_oge_neg1_to_neg1( +; CHECK-SAME: float nofpclass(inf zero sub norm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_OGE_NEG1:%.*]] = fcmp oge float [[FABS_ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_OGE_NEG1]], float -1.000000e+00, float [[ARG]] @@ -1236,7 +1236,7 @@ define float @clamp_fabs_oge_neg1_to_neg1(float %arg) { define float @clamp_fabs_ugt_neg1_to_neg1(float %arg) { ; CHECK-LABEL: define noundef nofpclass(nan inf zero sub pnorm) float @clamp_fabs_ugt_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-SAME: float nofpclass(all) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret float -1.000000e+00 ; %fabs.arg = call float @llvm.fabs.f32(float %arg) @@ -1247,7 +1247,7 @@ define float @clamp_fabs_ugt_neg1_to_neg1(float %arg) { define float @clamp_fabs_uge_neg1_to_neg1(float %arg) { ; CHECK-LABEL: define noundef nofpclass(nan inf zero sub pnorm) float @clamp_fabs_uge_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-SAME: float nofpclass(all) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret float -1.000000e+00 ; %fabs.arg = call float @llvm.fabs.f32(float %arg) @@ -1268,8 +1268,8 @@ define float @clamp_fabs_oeq_neg1_to_neg1(float %arg) { } define float @clamp_fabs_ueq_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub) float @clamp_fabs_ueq_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub) float @clamp_fabs_ueq_neg1_to_neg1( +; CHECK-SAME: float nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_UEQ_NEG1:%.*]] = fcmp ueq float [[FABS_ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_UEQ_NEG1]], float -1.000000e+00, float [[ARG]] @@ -1282,8 +1282,8 @@ define float @clamp_fabs_ueq_neg1_to_neg1(float %arg) { } define float @clamp_fabs_one_neg1_to_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub) float @clamp_fabs_one_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub pnorm) float @clamp_fabs_one_neg1_to_neg1( +; CHECK-SAME: float nofpclass(all) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[FABS_IS_ONE_NEG1:%.*]] = fcmp one float [[FABS_ARG]], -1.000000e+00 ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[FABS_IS_ONE_NEG1]], float -1.000000e+00, float [[ARG]] @@ -1297,7 +1297,7 @@ define float @clamp_fabs_one_neg1_to_neg1(float %arg) { define float @clamp_fabs_une_neg1_to_neg1(float %arg) { ; CHECK-LABEL: define noundef nofpclass(nan inf zero sub pnorm) float @clamp_fabs_une_neg1_to_neg1( -; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR2]] { +; CHECK-SAME: float nofpclass(all) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret float -1.000000e+00 ; %fabs.arg = call float @llvm.fabs.f32(float %arg) @@ -1311,8 +1311,8 @@ define float @clamp_fabs_une_neg1_to_neg1(float %arg) { ;--------------------------------------------------------------------- define float @ret_assumed_ogt_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ogt_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-LABEL: define nofpclass(nan ninf zero sub nnorm) float @ret_assumed_ogt_1( +; CHECK-SAME: float returned nofpclass(nan ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[OGT_1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OGT_1]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: ret float [[ARG]] @@ -1323,8 +1323,8 @@ define float @ret_assumed_ogt_1(float %arg) { } define float @ret_assumed_oge_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_oge_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf zero sub nnorm) float @ret_assumed_oge_1( +; CHECK-SAME: float returned nofpclass(nan ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OGE_1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OGE_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1335,8 +1335,8 @@ define float @ret_assumed_oge_1(float %arg) { } define float @ret_assumed_olt_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_olt_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan pinf) float @ret_assumed_olt_1( +; CHECK-SAME: float returned nofpclass(nan pinf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OLT_1:%.*]] = fcmp olt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OLT_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1347,8 +1347,8 @@ define float @ret_assumed_olt_1(float %arg) { } define float @ret_assumed_ole_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ole_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan pinf) float @ret_assumed_ole_1( +; CHECK-SAME: float returned nofpclass(nan pinf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OLE_1:%.*]] = fcmp ole float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OLE_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1359,8 +1359,8 @@ define float @ret_assumed_ole_1(float %arg) { } define float @ret_assumed_ugt_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ugt_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(ninf zero sub nnorm) float @ret_assumed_ugt_1( +; CHECK-SAME: float returned nofpclass(ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[UGT_1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UGT_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1371,8 +1371,8 @@ define float @ret_assumed_ugt_1(float %arg) { } define float @ret_assumed_uge_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_uge_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(ninf zero sub nnorm) float @ret_assumed_uge_1( +; CHECK-SAME: float returned nofpclass(ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[UGE_1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UGE_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1383,8 +1383,8 @@ define float @ret_assumed_uge_1(float %arg) { } define float @ret_assumed_ult_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ult_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(pinf) float @ret_assumed_ult_1( +; CHECK-SAME: float returned nofpclass(pinf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ULT_1:%.*]] = fcmp ult float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ULT_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1395,8 +1395,8 @@ define float @ret_assumed_ult_1(float %arg) { } define float @ret_assumed_ule_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ule_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(pinf) float @ret_assumed_ule_1( +; CHECK-SAME: float returned nofpclass(pinf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ULE_1:%.*]] = fcmp ule float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ULE_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1407,8 +1407,8 @@ define float @ret_assumed_ule_1(float %arg) { } define float @ret_assumed_fabs_ogt_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_ogt_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf zero sub nnorm) float @ret_assumed_fabs_ogt_1( +; CHECK-SAME: float returned nofpclass(nan ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[OGT_1:%.*]] = fcmp ogt float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OGT_1]]) #[[ATTR5]] @@ -1421,8 +1421,8 @@ define float @ret_assumed_fabs_ogt_1(float %arg) { } define float @ret_assumed_fabs_oge_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_oge_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf zero sub nnorm) float @ret_assumed_fabs_oge_1( +; CHECK-SAME: float returned nofpclass(nan ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[OGE_1:%.*]] = fcmp oge float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OGE_1]]) #[[ATTR5]] @@ -1435,8 +1435,8 @@ define float @ret_assumed_fabs_oge_1(float %arg) { } define float @ret_assumed_fabs_olt_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_olt_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) float @ret_assumed_fabs_olt_1( +; CHECK-SAME: float returned nofpclass(nan inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[OLT_1:%.*]] = fcmp olt float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OLT_1]]) #[[ATTR5]] @@ -1449,8 +1449,8 @@ define float @ret_assumed_fabs_olt_1(float %arg) { } define float @ret_assumed_fabs_ole_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_ole_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) float @ret_assumed_fabs_ole_1( +; CHECK-SAME: float returned nofpclass(nan inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[OLE_1:%.*]] = fcmp olt float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OLE_1]]) #[[ATTR5]] @@ -1463,8 +1463,8 @@ define float @ret_assumed_fabs_ole_1(float %arg) { } define float @ret_assumed_fabs_ugt_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_ugt_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(ninf zero sub nnorm) float @ret_assumed_fabs_ugt_1( +; CHECK-SAME: float returned nofpclass(ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[UGT_1:%.*]] = fcmp ugt float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UGT_1]]) #[[ATTR5]] @@ -1477,8 +1477,8 @@ define float @ret_assumed_fabs_ugt_1(float %arg) { } define float @ret_assumed_fabs_uge_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_uge_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(ninf zero sub nnorm) float @ret_assumed_fabs_uge_1( +; CHECK-SAME: float returned nofpclass(ninf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[UGE_1:%.*]] = fcmp ugt float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UGE_1]]) #[[ATTR5]] @@ -1491,8 +1491,8 @@ define float @ret_assumed_fabs_uge_1(float %arg) { } define float @ret_assumed_fabs_ult_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_ult_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_assumed_fabs_ult_1( +; CHECK-SAME: float returned nofpclass(inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[ULT_1:%.*]] = fcmp ult float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ULT_1]]) #[[ATTR5]] @@ -1505,8 +1505,8 @@ define float @ret_assumed_fabs_ult_1(float %arg) { } define float @ret_assumed_fabs_ule_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_ule_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_assumed_fabs_ule_1( +; CHECK-SAME: float returned nofpclass(inf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[ULE_1:%.*]] = fcmp ule float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ULE_1]]) #[[ATTR5]] @@ -1519,8 +1519,8 @@ define float @ret_assumed_fabs_ule_1(float %arg) { } define float @ret_assumed_ogt_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ogt_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf) float @ret_assumed_ogt_neg1( +; CHECK-SAME: float returned nofpclass(nan ninf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OGT_NEG1:%.*]] = fcmp ogt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OGT_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1531,8 +1531,8 @@ define float @ret_assumed_ogt_neg1(float %arg) { } define float @ret_assumed_oge_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_oge_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf) float @ret_assumed_oge_neg1( +; CHECK-SAME: float returned nofpclass(nan ninf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OGE_NEG1:%.*]] = fcmp ogt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OGE_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1543,8 +1543,8 @@ define float @ret_assumed_oge_neg1(float %arg) { } define float @ret_assumed_olt_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_olt_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan pinf zero sub pnorm) float @ret_assumed_olt_neg1( +; CHECK-SAME: float returned nofpclass(nan pinf zero sub pnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OLT_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1555,8 +1555,8 @@ define float @ret_assumed_olt_neg1(float %arg) { } define float @ret_assumed_ole_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ole_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan pinf zero sub pnorm) float @ret_assumed_ole_neg1( +; CHECK-SAME: float returned nofpclass(nan pinf zero sub pnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OLE_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1567,8 +1567,8 @@ define float @ret_assumed_ole_neg1(float %arg) { } define float @ret_assumed_ugt_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ugt_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(ninf) float @ret_assumed_ugt_neg1( +; CHECK-SAME: float returned nofpclass(ninf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[UGT_NEG1:%.*]] = fcmp ugt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UGT_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1579,8 +1579,8 @@ define float @ret_assumed_ugt_neg1(float %arg) { } define float @ret_assumed_uge_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_uge_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(ninf) float @ret_assumed_uge_neg1( +; CHECK-SAME: float returned nofpclass(ninf) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[UGE_NEG1:%.*]] = fcmp uge float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UGE_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1591,8 +1591,8 @@ define float @ret_assumed_uge_neg1(float %arg) { } define float @ret_assumed_ult_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ult_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(pinf zero sub pnorm) float @ret_assumed_ult_neg1( +; CHECK-SAME: float returned nofpclass(pinf zero sub pnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ULT_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1603,8 +1603,8 @@ define float @ret_assumed_ult_neg1(float %arg) { } define float @ret_assumed_ule_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ule_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(pinf zero sub pnorm) float @ret_assumed_ule_neg1( +; CHECK-SAME: float returned nofpclass(pinf zero sub pnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ULE_NEG1:%.*]] = fcmp ule float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ULE_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1615,8 +1615,8 @@ define float @ret_assumed_ule_neg1(float %arg) { } define float @ret_assumed_oeq_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_oeq_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @ret_assumed_oeq_1( +; CHECK-SAME: float returned nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[OEQ_1:%.*]] = fcmp oeq float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OEQ_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1627,8 +1627,8 @@ define float @ret_assumed_oeq_1(float %arg) { } define float @ret_assumed_ueq_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_ueq_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(inf zero sub nnorm) float @ret_assumed_ueq_1( +; CHECK-SAME: float returned nofpclass(inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[UEQ_1:%.*]] = fcmp ueq float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UEQ_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1639,8 +1639,8 @@ define float @ret_assumed_ueq_1(float %arg) { } define float @ret_assumed_one_1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_one_1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan) float @ret_assumed_one_1( +; CHECK-SAME: float returned nofpclass(nan) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ONE_1:%.*]] = fcmp one float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ONE_1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1651,8 +1651,8 @@ define float @ret_assumed_one_1(float %arg) { } define float @ret_assumed_one_neg1(float %arg) { -; CHECK-LABEL: define float @ret_assumed_one_neg1( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan) float @ret_assumed_one_neg1( +; CHECK-SAME: float returned nofpclass(nan) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ONE_NEG1:%.*]] = fcmp one float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ONE_NEG1]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -1687,8 +1687,8 @@ define float @ret_assumed_une_1(float %arg) { } define float @ret_assumed_fabs_oeq_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_oeq_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @ret_assumed_fabs_oeq_1( +; CHECK-SAME: float returned nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[OEQ_1:%.*]] = fcmp oeq float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[OEQ_1]]) #[[ATTR5]] @@ -1701,8 +1701,8 @@ define float @ret_assumed_fabs_oeq_1(float %arg) { } define float @ret_assumed_fabs_ueq_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_ueq_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(inf zero sub nnorm) float @ret_assumed_fabs_ueq_1( +; CHECK-SAME: float returned nofpclass(inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[UEQ_1:%.*]] = fcmp ueq float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UEQ_1]]) #[[ATTR5]] @@ -1715,8 +1715,8 @@ define float @ret_assumed_fabs_ueq_1(float %arg) { } define float @ret_assumed_fabs_one_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_one_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_assumed_fabs_one_1( +; CHECK-SAME: float returned nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[ONE_1:%.*]] = fcmp one float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ONE_1]]) #[[ATTR5]] @@ -1729,8 +1729,8 @@ define float @ret_assumed_fabs_one_1(float %arg) { } define float @ret_assumed_fabs_une_1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_une_1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_assumed_fabs_une_1( +; CHECK-SAME: float returned nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[UNE_1:%.*]] = fcmp one float [[ARG_FABS]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UNE_1]]) #[[ATTR5]] @@ -1743,8 +1743,8 @@ define float @ret_assumed_fabs_une_1(float %arg) { } define float @ret_assumed_fabs_oeq_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_oeq_neg1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(all) float @ret_assumed_fabs_oeq_neg1( +; CHECK-SAME: float returned nofpclass(all) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: call void @llvm.assume(i1 noundef false) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] ; @@ -1755,8 +1755,8 @@ define float @ret_assumed_fabs_oeq_neg1(float %arg) { } define float @ret_assumed_fabs_ueq_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_ueq_neg1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(inf zero sub norm) float @ret_assumed_fabs_ueq_neg1( +; CHECK-SAME: float returned nofpclass(inf zero sub norm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[UEQ_NEG1:%.*]] = fcmp ueq float [[ARG_FABS]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[UEQ_NEG1]]) #[[ATTR5]] @@ -1769,8 +1769,8 @@ define float @ret_assumed_fabs_ueq_neg1(float %arg) { } define float @ret_assumed_fabs_one_neg1(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_assumed_fabs_one_neg1( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_assumed_fabs_one_neg1( +; CHECK-SAME: float returned nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[ARG_FABS:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[ONE_NEG1:%.*]] = fcmp one float [[ARG_FABS]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[ONE_NEG1]]) #[[ATTR5]] @@ -2228,8 +2228,8 @@ define float @ret_assumed_uge_known_negative(float %arg, float %unknown) { ;--------------------------------------------------------------------- define float @assume_oeq_smallest_normal(float %arg) { -; CHECK-LABEL: define float @assume_oeq_smallest_normal( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @assume_oeq_smallest_normal( +; CHECK-SAME: float returned nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[IS_OEQ_SMALLEST_NORMAL:%.*]] = fcmp oeq float [[ARG]], 0x3810000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[IS_OEQ_SMALLEST_NORMAL]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -2240,8 +2240,8 @@ define float @assume_oeq_smallest_normal(float %arg) { } define float @assume_one_smallest_normal(float %arg) { -; CHECK-LABEL: define float @assume_one_smallest_normal( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan) float @assume_one_smallest_normal( +; CHECK-SAME: float returned nofpclass(nan) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[IS_ONE_SMALLEST_NORMAL:%.*]] = fcmp one float [[ARG]], 0x3810000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[IS_ONE_SMALLEST_NORMAL]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -2252,8 +2252,8 @@ define float @assume_one_smallest_normal(float %arg) { } define float @assume_ueq_smallest_normal(float %arg) { -; CHECK-LABEL: define float @assume_ueq_smallest_normal( -; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(inf zero sub nnorm) float @assume_ueq_smallest_normal( +; CHECK-SAME: float returned nofpclass(inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[IS_UEQ_SMALLEST_NORMAL:%.*]] = fcmp ueq float [[ARG]], 0x3810000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[IS_UEQ_SMALLEST_NORMAL]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] @@ -2300,8 +2300,8 @@ define float @assume_uno_smallest_normal(float %arg) { } define float @assume_fabs_oeq_smallest_normal(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @assume_fabs_oeq_smallest_normal( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @assume_fabs_oeq_smallest_normal( +; CHECK-SAME: float returned nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_OEQ_SMALLEST_NORMAL:%.*]] = fcmp oeq float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[IS_OEQ_SMALLEST_NORMAL]]) #[[ATTR5]] @@ -2314,8 +2314,8 @@ define float @assume_fabs_oeq_smallest_normal(float %arg) { } define float @assume_fabs_one_smallest_normal(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @assume_fabs_one_smallest_normal( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @assume_fabs_one_smallest_normal( +; CHECK-SAME: float returned nofpclass(nan ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_ONE_SMALLEST_NORMAL:%.*]] = fcmp one float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[IS_ONE_SMALLEST_NORMAL]]) #[[ATTR5]] @@ -2328,8 +2328,8 @@ define float @assume_fabs_one_smallest_normal(float %arg) { } define float @assume_fabs_ueq_smallest_normal(float %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @assume_fabs_ueq_smallest_normal( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(inf zero sub nnorm) float @assume_fabs_ueq_smallest_normal( +; CHECK-SAME: float returned nofpclass(inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[FABS_ARG:%.*]] = call float @llvm.fabs.f32(float [[ARG]]) #[[ATTR4]] ; CHECK-NEXT: [[IS_UEQ_SMALLEST_NORMAL:%.*]] = fcmp ueq float [[FABS_ARG]], 0x3810000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[IS_UEQ_SMALLEST_NORMAL]]) #[[ATTR5]] @@ -2384,8 +2384,8 @@ define float @assume_fabs_uno_smallest_normal(float %arg) { } define float @assume_oeq_smallest_normal_known_pos(float nofpclass(ninf nsub nnorm nzero) %arg) { -; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @assume_oeq_smallest_normal_known_pos( -; CHECK-SAME: float returned nofpclass(ninf nzero nsub nnorm) [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @assume_oeq_smallest_normal_known_pos( +; CHECK-SAME: float returned nofpclass(nan inf zero sub nnorm) [[ARG:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[IS_OEQ_SMALLEST_NORMAL:%.*]] = fcmp oeq float [[ARG]], 0x3810000000000000 ; CHECK-NEXT: call void @llvm.assume(i1 noundef [[IS_OEQ_SMALLEST_NORMAL]]) #[[ATTR5]] ; CHECK-NEXT: ret float [[ARG]] diff --git a/llvm/test/Transforms/InstSimplify/assume-fcmp-constant-implies-class.ll b/llvm/test/Transforms/InstSimplify/assume-fcmp-constant-implies-class.ll index 7970f3ce6bf09d..8d5ac063108c23 100644 --- a/llvm/test/Transforms/InstSimplify/assume-fcmp-constant-implies-class.ll +++ b/llvm/test/Transforms/InstSimplify/assume-fcmp-constant-implies-class.ll @@ -17,8 +17,7 @@ define i1 @assume_olt_neg1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -31,8 +30,7 @@ define i1 @assume_olt_neg1__ogt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -45,8 +43,7 @@ define i1 @assume_olt_neg1__oge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -59,8 +56,7 @@ define i1 @assume_olt_neg1__olt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -73,8 +69,7 @@ define i1 @assume_olt_neg1__ole_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ole float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -87,8 +82,7 @@ define i1 @assume_olt_neg1__one_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -101,8 +95,7 @@ define i1 @assume_olt_neg1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -115,8 +108,7 @@ define i1 @assume_olt_neg1__ueq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -129,8 +121,7 @@ define i1 @assume_olt_neg1__ugt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -143,8 +134,7 @@ define i1 @assume_olt_neg1__uge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -157,8 +147,7 @@ define i1 @assume_olt_neg1__ult_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -171,8 +160,7 @@ define i1 @assume_olt_neg1__ule_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -185,8 +173,7 @@ define i1 @assume_olt_neg1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -199,8 +186,7 @@ define i1 @assume_olt_neg1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -217,8 +203,7 @@ define i1 @assume_ole_neg1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -231,8 +216,7 @@ define i1 @assume_ole_neg1__ogt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -245,8 +229,7 @@ define i1 @assume_ole_neg1__oge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -259,8 +242,7 @@ define i1 @assume_ole_neg1__olt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -273,8 +255,7 @@ define i1 @assume_ole_neg1__ole_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ole float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -287,8 +268,7 @@ define i1 @assume_ole_neg1__one_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -301,8 +281,7 @@ define i1 @assume_ole_neg1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -315,8 +294,7 @@ define i1 @assume_ole_neg1__ueq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -329,8 +307,7 @@ define i1 @assume_ole_neg1__ugt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -343,8 +320,7 @@ define i1 @assume_ole_neg1__uge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -357,8 +333,7 @@ define i1 @assume_ole_neg1__ult_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -371,8 +346,7 @@ define i1 @assume_ole_neg1__ule_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -385,8 +359,7 @@ define i1 @assume_ole_neg1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -399,8 +372,7 @@ define i1 @assume_ole_neg1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_NEG1:%.*]] = fcmp ole float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.neg1 = fcmp ole float %arg, -1.0 call void @llvm.assume(i1 %ole.neg1) @@ -501,8 +473,7 @@ define i1 @assume_ogt_neg1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_NEG1:%.*]] = fcmp ogt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.neg1 = fcmp ogt float %arg, -1.0 call void @llvm.assume(i1 %ogt.neg1) @@ -599,8 +570,7 @@ define i1 @assume_ogt_neg1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_NEG1:%.*]] = fcmp ogt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.neg1 = fcmp ogt float %arg, -1.0 call void @llvm.assume(i1 %ogt.neg1) @@ -701,8 +671,7 @@ define i1 @assume_oge_neg1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_NEG1:%.*]] = fcmp oge float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.neg1 = fcmp oge float %arg, -1.0 call void @llvm.assume(i1 %oge.neg1) @@ -799,8 +768,7 @@ define i1 @assume_oge_neg1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_NEG1:%.*]] = fcmp oge float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.neg1 = fcmp oge float %arg, -1.0 call void @llvm.assume(i1 %oge.neg1) @@ -1217,8 +1185,7 @@ define i1 @assume_ule_neg1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULE_NEG1:%.*]] = fcmp ule float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ule.neg1 = fcmp ule float %arg, -1.0 call void @llvm.assume(i1 %ule.neg1) @@ -1231,8 +1198,7 @@ define i1 @assume_ule_neg1__ogt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULE_NEG1:%.*]] = fcmp ule float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ule.neg1 = fcmp ule float %arg, -1.0 call void @llvm.assume(i1 %ule.neg1) @@ -1245,8 +1211,7 @@ define i1 @assume_ule_neg1__oge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULE_NEG1:%.*]] = fcmp ule float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ule.neg1 = fcmp ule float %arg, -1.0 call void @llvm.assume(i1 %ule.neg1) @@ -1357,8 +1322,7 @@ define i1 @assume_ule_neg1__ult_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULE_NEG1:%.*]] = fcmp ule float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ule.neg1 = fcmp ule float %arg, -1.0 call void @llvm.assume(i1 %ule.neg1) @@ -1371,8 +1335,7 @@ define i1 @assume_ule_neg1__ule_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULE_NEG1:%.*]] = fcmp ule float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ule.neg1 = fcmp ule float %arg, -1.0 call void @llvm.assume(i1 %ule.neg1) @@ -1385,8 +1348,7 @@ define i1 @assume_ule_neg1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULE_NEG1:%.*]] = fcmp ule float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULE_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ule.neg1 = fcmp ule float %arg, -1.0 call void @llvm.assume(i1 %ule.neg1) @@ -1417,8 +1379,7 @@ define i1 @assume_ult_neg1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ult.neg1 = fcmp ult float %arg, -1.0 call void @llvm.assume(i1 %ult.neg1) @@ -1431,8 +1392,7 @@ define i1 @assume_ult_neg1__ogt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ult.neg1 = fcmp ult float %arg, -1.0 call void @llvm.assume(i1 %ult.neg1) @@ -1445,8 +1405,7 @@ define i1 @assume_ult_neg1__oge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ult.neg1 = fcmp ult float %arg, -1.0 call void @llvm.assume(i1 %ult.neg1) @@ -1557,8 +1516,7 @@ define i1 @assume_ult_neg1__ult_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ult.neg1 = fcmp ult float %arg, -1.0 call void @llvm.assume(i1 %ult.neg1) @@ -1571,8 +1529,7 @@ define i1 @assume_ult_neg1__ule_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ult.neg1 = fcmp ult float %arg, -1.0 call void @llvm.assume(i1 %ult.neg1) @@ -1585,8 +1542,7 @@ define i1 @assume_ult_neg1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[ULT_NEG1:%.*]] = fcmp ult float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[ULT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ult.neg1 = fcmp ult float %arg, -1.0 call void @llvm.assume(i1 %ult.neg1) @@ -1824,8 +1780,7 @@ define i1 @assume_olt_pos1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_POS1:%.*]] = fcmp olt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.pos1 = fcmp olt float %arg, 1.0 call void @llvm.assume(i1 %olt.pos1) @@ -1922,8 +1877,7 @@ define i1 @assume_olt_pos1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_POS1:%.*]] = fcmp olt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.pos1 = fcmp olt float %arg, 1.0 call void @llvm.assume(i1 %olt.pos1) @@ -2024,8 +1978,7 @@ define i1 @assume_ole_pos1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_POS1:%.*]] = fcmp ole float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ole.pos1 = fcmp ole float %arg, 1.0 call void @llvm.assume(i1 %ole.pos1) @@ -2122,8 +2075,7 @@ define i1 @assume_ole_pos1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLE_POS1:%.*]] = fcmp ole float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ole.pos1 = fcmp ole float %arg, 1.0 call void @llvm.assume(i1 %ole.pos1) @@ -2140,8 +2092,7 @@ define i1 @assume_ogt_pos1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2154,8 +2105,7 @@ define i1 @assume_ogt_pos1__ogt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2168,8 +2118,7 @@ define i1 @assume_ogt_pos1__oge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2182,8 +2131,7 @@ define i1 @assume_ogt_pos1__olt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2196,8 +2144,7 @@ define i1 @assume_ogt_pos1__ole_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ole float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2210,8 +2157,7 @@ define i1 @assume_ogt_pos1__one_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2224,8 +2170,7 @@ define i1 @assume_ogt_pos1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2238,8 +2183,7 @@ define i1 @assume_ogt_pos1__ueq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2252,8 +2196,7 @@ define i1 @assume_ogt_pos1__ugt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2266,8 +2209,7 @@ define i1 @assume_ogt_pos1__uge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2280,8 +2222,7 @@ define i1 @assume_ogt_pos1__ult_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2294,8 +2235,7 @@ define i1 @assume_ogt_pos1__ule_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2308,8 +2248,7 @@ define i1 @assume_ogt_pos1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2322,8 +2261,7 @@ define i1 @assume_ogt_pos1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGT_POS1:%.*]] = fcmp ogt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ogt.pos1 = fcmp ogt float %arg, 1.0 call void @llvm.assume(i1 %ogt.pos1) @@ -2340,8 +2278,7 @@ define i1 @assume_oge_pos1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2354,8 +2291,7 @@ define i1 @assume_oge_pos1__ogt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2368,8 +2304,7 @@ define i1 @assume_oge_pos1__oge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2382,8 +2317,7 @@ define i1 @assume_oge_pos1__olt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2396,8 +2330,7 @@ define i1 @assume_oge_pos1__ole_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ole float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2410,8 +2343,7 @@ define i1 @assume_oge_pos1__one_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2424,8 +2356,7 @@ define i1 @assume_oge_pos1__ord_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2438,8 +2369,7 @@ define i1 @assume_oge_pos1__ueq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2452,8 +2382,7 @@ define i1 @assume_oge_pos1__ugt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2466,8 +2395,7 @@ define i1 @assume_oge_pos1__uge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2480,8 +2408,7 @@ define i1 @assume_oge_pos1__ult_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2494,8 +2421,7 @@ define i1 @assume_oge_pos1__ule_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2508,8 +2434,7 @@ define i1 @assume_oge_pos1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2522,8 +2447,7 @@ define i1 @assume_oge_pos1__uno_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OGE_POS1:%.*]] = fcmp oge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %oge.pos1 = fcmp oge float %arg, 1.0 call void @llvm.assume(i1 %oge.pos1) @@ -2540,8 +2464,7 @@ define i1 @assume_ugt_pos1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGT_POS1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ugt.pos1 = fcmp ugt float %arg, 1.0 call void @llvm.assume(i1 %ugt.pos1) @@ -2582,8 +2505,7 @@ define i1 @assume_ugt_pos1__olt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGT_POS1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ugt.pos1 = fcmp ugt float %arg, 1.0 call void @llvm.assume(i1 %ugt.pos1) @@ -2596,8 +2518,7 @@ define i1 @assume_ugt_pos1__ole_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGT_POS1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ole float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %ugt.pos1 = fcmp ugt float %arg, 1.0 call void @llvm.assume(i1 %ugt.pos1) @@ -2652,8 +2573,7 @@ define i1 @assume_ugt_pos1__ugt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGT_POS1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ugt.pos1 = fcmp ugt float %arg, 1.0 call void @llvm.assume(i1 %ugt.pos1) @@ -2666,8 +2586,7 @@ define i1 @assume_ugt_pos1__uge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGT_POS1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ugt.pos1 = fcmp ugt float %arg, 1.0 call void @llvm.assume(i1 %ugt.pos1) @@ -2708,8 +2627,7 @@ define i1 @assume_ugt_pos1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGT_POS1:%.*]] = fcmp ugt float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGT_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %ugt.pos1 = fcmp ugt float %arg, 1.0 call void @llvm.assume(i1 %ugt.pos1) @@ -2740,8 +2658,7 @@ define i1 @assume_uge_pos1__oeq_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGE_POS1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %uge.pos1 = fcmp uge float %arg, 1.0 call void @llvm.assume(i1 %uge.pos1) @@ -2782,8 +2699,7 @@ define i1 @assume_uge_pos1__olt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGE_POS1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %uge.pos1 = fcmp uge float %arg, 1.0 call void @llvm.assume(i1 %uge.pos1) @@ -2796,8 +2712,7 @@ define i1 @assume_uge_pos1__ole_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGE_POS1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ole float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %uge.pos1 = fcmp uge float %arg, 1.0 call void @llvm.assume(i1 %uge.pos1) @@ -2852,8 +2767,7 @@ define i1 @assume_uge_pos1__ugt_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGE_POS1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %uge.pos1 = fcmp uge float %arg, 1.0 call void @llvm.assume(i1 %uge.pos1) @@ -2866,8 +2780,7 @@ define i1 @assume_uge_pos1__uge_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGE_POS1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %uge.pos1 = fcmp uge float %arg, 1.0 call void @llvm.assume(i1 %uge.pos1) @@ -2908,8 +2821,7 @@ define i1 @assume_uge_pos1__une_0(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[UGE_POS1:%.*]] = fcmp uge float [[ARG]], 1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[UGE_POS1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ARG]], 0.000000e+00 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %uge.pos1 = fcmp uge float %arg, 1.0 call void @llvm.assume(i1 %uge.pos1) @@ -2940,8 +2852,7 @@ define i1 @assume_olt_neg1__oeq_inf(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ARG]], 0x7FF0000000000000 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) @@ -2954,8 +2865,7 @@ define i1 @assume_olt_neg1__one_inf(float %arg) { ; CHECK-SAME: float [[ARG:%.*]]) { ; CHECK-NEXT: [[OLT_NEG1:%.*]] = fcmp olt float [[ARG]], -1.000000e+00 ; CHECK-NEXT: call void @llvm.assume(i1 [[OLT_NEG1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[ARG]], 0x7FF0000000000000 -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 true ; %olt.neg1 = fcmp olt float %arg, -1.0 call void @llvm.assume(i1 %olt.neg1) From 5a4ca51a91ff28b1d6bdde5403144c29b86e4b54 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Fri, 1 Dec 2023 10:03:02 +0100 Subject: [PATCH 13/72] [mlir] notify insertion of parent op first when cloning (#73806) When cloning an operation with a region, the builder was currently notifying about the insertion of the cloned operations inside the region before the cloned operation itself. When using cloning inside rewrite pass, this could cause issues if a pattern is expected to be applied on a cloned parent operation before trying to apply patterns on the cloned operations it contains (the patterns are attempted in order of notifications for the cloned operations). --- mlir/lib/IR/Builders.cpp | 7 ++++--- mlir/test/IR/test-clone.mlir | 23 +++++++++++++++++++---- mlir/test/lib/IR/TestClone.cpp | 8 ++++++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/mlir/lib/IR/Builders.cpp b/mlir/lib/IR/Builders.cpp index ab20f4863e11c2..2cabfcd24d3559 100644 --- a/mlir/lib/IR/Builders.cpp +++ b/mlir/lib/IR/Builders.cpp @@ -527,7 +527,8 @@ LogicalResult OpBuilder::tryFold(Operation *op, Operation *OpBuilder::clone(Operation &op, IRMapping &mapper) { Operation *newOp = op.clone(mapper); - // The `insert` call below handles the notification for inserting `newOp` + newOp = insert(newOp); + // The `insert` call above handles the notification for inserting `newOp` // itself. But if `newOp` has any regions, we need to notify the listener // about any ops that got inserted inside those regions as part of cloning. if (listener) { @@ -535,9 +536,9 @@ Operation *OpBuilder::clone(Operation &op, IRMapping &mapper) { listener->notifyOperationInserted(walkedOp); }; for (Region ®ion : newOp->getRegions()) - region.walk(walkFn); + region.walk(walkFn); } - return insert(newOp); + return newOp; } Operation *OpBuilder::clone(Operation &op) { diff --git a/mlir/test/IR/test-clone.mlir b/mlir/test/IR/test-clone.mlir index 575098b642e8ea..0c07593aef32d9 100644 --- a/mlir/test/IR/test-clone.mlir +++ b/mlir/test/IR/test-clone.mlir @@ -1,20 +1,35 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(test-clone))" -split-input-file +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(test-clone))" | FileCheck %s module { func.func @fixpoint(%arg1 : i32) -> i32 { %r = "test.use"(%arg1) ({ - "test.yield"(%arg1) : (i32) -> () + %r2 = "test.use2"(%arg1) ({ + "test.yield2"(%arg1) : (i32) -> () + }) : (i32) -> i32 + "test.yield"(%r2) : (i32) -> () }) : (i32) -> i32 return %r : i32 } } +// CHECK: notifyOperationInserted: test.use +// CHECK-NEXT: notifyOperationInserted: test.use2 +// CHECK-NEXT: notifyOperationInserted: test.yield2 +// CHECK-NEXT: notifyOperationInserted: test.yield +// CHECK-NEXT: notifyOperationInserted: func.return + // CHECK: func @fixpoint(%[[arg0:.+]]: i32) -> i32 { // CHECK-NEXT: %[[i0:.+]] = "test.use"(%[[arg0]]) ({ -// CHECK-NEXT: "test.yield"(%arg0) : (i32) -> () +// CHECK-NEXT: %[[r2:.+]] = "test.use2"(%[[arg0]]) ({ +// CHECK-NEXT: "test.yield2"(%[[arg0]]) : (i32) -> () +// CHECK-NEXT: }) : (i32) -> i32 +// CHECK-NEXT: "test.yield"(%[[r2]]) : (i32) -> () // CHECK-NEXT: }) : (i32) -> i32 // CHECK-NEXT: %[[i1:.+]] = "test.use"(%[[i0]]) ({ -// CHECK-NEXT: "test.yield"(%[[i0]]) : (i32) -> () +// CHECK-NEXT: %[[r2:.+]] = "test.use2"(%[[i0]]) ({ +// CHECK-NEXT: "test.yield2"(%[[i0]]) : (i32) -> () +// CHECK-NEXT: }) : (i32) -> i32 +// CHECK-NEXT: "test.yield"(%[[r2]]) : (i32) -> () // CHECK-NEXT: }) : (i32) -> i32 // CHECK-NEXT: return %[[i1]] : i32 // CHECK-NEXT: } diff --git a/mlir/test/lib/IR/TestClone.cpp b/mlir/test/lib/IR/TestClone.cpp index 70238608a67c2b..13a0cfeb402a9c 100644 --- a/mlir/test/lib/IR/TestClone.cpp +++ b/mlir/test/lib/IR/TestClone.cpp @@ -14,6 +14,12 @@ using namespace mlir; namespace { +struct DumpNotifications : public OpBuilder::Listener { + void notifyOperationInserted(Operation *op) override { + llvm::outs() << "notifyOperationInserted: " << op->getName() << "\n"; + } +}; + /// This is a test pass which clones the body of a function. Specifically /// this pass replaces f(x) to instead return f(f(x)) in which the cloned body /// takes the result of the first operation return as an input. @@ -50,6 +56,8 @@ struct ClonePass } OpBuilder builder(op->getContext()); + DumpNotifications dumpNotifications; + builder.setListener(&dumpNotifications); builder.setInsertionPointToEnd(®ionEntry); SmallVector toClone; for (Operation &inst : regionEntry) From a224ddc9b4458b1b9cf0a758c974a554f0f17dc4 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Fri, 1 Dec 2023 10:21:50 +0100 Subject: [PATCH 14/72] [mlir][nvvm] Introduce `cp.async.bulk.commit.group` This PR introduced `cp.async.bulk.commit.group` op. --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 9 +++++++++ mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 54826f4196993d..ecad1a16eb6c59 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -1420,6 +1420,15 @@ def NVVM_MmaOp : NVVM_Op<"mma.sync", [AttrSizedOperandSegments]> { // NVVM TMA Ops //===----------------------------------------------------------------------===// +def NVVM_CpAsyncBulkCommitGroupOp : NVVM_PTXBuilder_Op<"cp.async.bulk.commit.group">, + Arguments<(ins )> { + let assemblyFormat = "attr-dict"; + let extraClassDefinition = [{ + std::string $cppClass::getPtx() { return std::string("cp.async.bulk.commit_group;"); } + }]; +} + + def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp : NVVM_Op<"cp.async.bulk.tensor.shared.cluster.global", [DeclareOpInterfaceMethods, diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 7da4e98c40e54b..5482cc194192dd 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -621,3 +621,11 @@ func.func @set_max_register() { nvvm.setmaxregister decrease 40 func.return } + +// ----- + +func.func @cp_bulk_commit() { + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "cp.async.bulk.commit_group;" + nvvm.cp.async.bulk.commit.group + func.return +} From 4d1dc7770a6411b87cc488dd982c034f1b4ff7a7 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 1 Dec 2023 09:22:13 +0000 Subject: [PATCH 15/72] AMDGPU/load-global-i32: regenerate test using UTC (NFC) (#73962) Fix the RUN lines so that UTC runs cleanly, and regenerate the test load-global-i32.ll using utils/update_llc_test_checks.py. --- llvm/test/CodeGen/AMDGPU/load-global-i32.ll | 4935 +++++++++++++++++-- 1 file changed, 4462 insertions(+), 473 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll index c4d9b4b2bb5ebb..55f0773f7e05ae 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll @@ -1,113 +1,825 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=GCNX3-HSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=GCNX3-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCNX3-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCNX3-NOHSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX900-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX908-HSA %s -; FUNC-LABEL: {{^}}global_load_i32: -; GCN-NOHSA: buffer_load_dword v{{[0-9]+}} -; GCN-HSA: {{flat|global}}_load_dword - -; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 define amdgpu_kernel void @global_load_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dword v2, v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: flat_store_dword v[0:1], v2 +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dword v1, v0, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: global_store_dword v0, v1, s[0:1] +; GCN-HSA-NEXT: s_endpgm entry: %ld = load i32, ptr addrspace(1) %in store i32 %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v2i32: -; GCN-NOHSA: buffer_load_dwordx2 -; GCN-HSA: {{flat|global}}_load_dwordx2 - -; EG: VTX_READ_64 define amdgpu_kernel void @global_load_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v2i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v2i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v2i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v2i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v2i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <2 x i32>, ptr addrspace(1) %in store <2 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v3i32: -; SI-NOHSA: buffer_load_dwordx4 -; GCNX3-NOHSA: buffer_load_dwordx3 -; GCNX3-HSA: {{flat|global}}_load_dwordx3 - -; EG: VTX_READ_128 define amdgpu_kernel void @global_load_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v3i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v3i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx3 v[0:2], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v3i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx3 v[0:2], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v3i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, +; EG-NEXT: MOV * T2.X, T0.Z, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LSHR * T3.X, PV.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v3i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx3 v[0:2], v3, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <3 x i32>, ptr addrspace(1) %in store <3 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v4i32: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; EG: VTX_READ_128 define amdgpu_kernel void @global_load_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v4i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v4i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v4i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v4i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v4i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <4 x i32>, ptr addrspace(1) %in store <4 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v8i32: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; EG: VTX_READ_128 -; EG: VTX_READ_128 define amdgpu_kernel void @global_load_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v8i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(1) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(1) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v8i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s0 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v11, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v8i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v8i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T2.X, PV.W, literal.x, +; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v8i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(1) +; GCN-HSA-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(1) +; GCN-HSA-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <8 x i32>, ptr addrspace(1) %in store <8 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v9i32: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dword -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dword define amdgpu_kernel void @global_load_v9i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v9i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dword v8, off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v9i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s4 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: flat_load_dword v14, v[8:9] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s0 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v11, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dword v[12:13], v14 +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v9i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dword v8, off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v9i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 8, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 2 @8 +; EG-NEXT: ALU 1, @23, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.X, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_128 T4.XYZW, T2.X, 0, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T2.X, 16, #1 +; EG-NEXT: VTX_READ_32 T3.X, T3.X, 32, #1 +; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) +; EG-NEXT: LSHR T1.X, PV.W, literal.x, +; EG-NEXT: MOV * T2.X, KC0[2].Z, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: MOV * T3.X, PS, +; EG-NEXT: ALU clause starting at 23: +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v9i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dword v9, v8, s[2:3] offset:32 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dword v8, v9, s[0:1] offset:32 +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <9 x i32>, ptr addrspace(1) %in store <9 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v10i32: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx2 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx2 define amdgpu_kernel void @global_load_v10i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v10i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v10i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: flat_load_dwordx2 v[8:9], v[8:9] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v11, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, s0 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v15, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx2 v[14:15], v[8:9] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v10i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v10i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 2 @8 +; EG-NEXT: ALU 7, @15, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T4.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 32, #1 +; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 15: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T3.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) +; EG-NEXT: LSHR T4.X, PV.W, literal.x, +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v10i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v10, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v10, s[2:3] +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v10, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx2 v[8:9], v10, s[2:3] offset:32 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:32 +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <10 x i32>, ptr addrspace(1) %in store <10 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v11i32: -; SI-NOHSA: buffer_load_dwordx4 -; SI-NOHSA: buffer_load_dwordx4 -; SI-NOHSA: buffer_load_dwordx4 -; GCNX3-NOHSA: buffer_load_dwordx4 -; GCNX3-NOHSA: buffer_load_dwordx4 -; GCNX3-NOHSA: buffer_load_dwordx3 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx3 define amdgpu_kernel void @global_load_v11i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v11i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:40 +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v11i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: flat_load_dwordx3 v[8:10], v[8:9] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v11, s0 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v15, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[11:12], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[13:14], v[4:7] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx3 v[15:16], v[8:10] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v11i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx3 v[8:10], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx3 v[8:10], off, s[4:7], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v11i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 2 @8 +; EG-NEXT: ALU 12, @15, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T7.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T6.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 32, #1 +; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 15: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T3.X, PV.W, literal.x, +; EG-NEXT: MOV * T4.X, T0.Z, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 40(5.605194e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T5.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) +; EG-NEXT: LSHR T6.X, PV.W, literal.x, +; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v11i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v11, s[2:3] +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v11, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx3 v[8:10], v11, s[2:3] offset:32 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v11, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v11, v[4:7], s[0:1] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx3 v11, v[8:10], s[0:1] offset:32 +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <11 x i32>, ptr addrspace(1) %in store <11 x i32> %ld, ptr addrspace(1) %out @@ -115,533 +827,3810 @@ entry: } -; FUNC-LABEL: {{^}}global_load_v12i32: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 define amdgpu_kernel void @global_load_v12i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v12i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v12i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s0 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v15, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v17, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[14:15], v[4:7] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v12i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v12i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 7, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 2 @8 +; EG-NEXT: ALU 1, @22, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_128 T3.XYZW, T2.X, 0, #1 +; EG-NEXT: VTX_READ_128 T4.XYZW, T2.X, 16, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T2.X, 32, #1 +; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) +; EG-NEXT: LSHR T1.X, PV.W, literal.x, +; EG-NEXT: MOV * T2.X, KC0[2].Z, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ALU clause starting at 22: +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v12i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v12, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v12, s[2:3] +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v12, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx4 v[8:11], v12, s[2:3] offset:32 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:32 +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <12 x i32>, ptr addrspace(1) %in store <12 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v16i32: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; EG: VTX_READ_128 -; EG: VTX_READ_128 -; EG: VTX_READ_128 -; EG: VTX_READ_128 define amdgpu_kernel void @global_load_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v16i32: +; SI-NOHSA: ; %bb.0: ; %entry +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:48 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(3) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(3) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:48 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(3) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(3) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v16i32: +; GCNX3-HSA: ; %bb.0: ; %entry +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s4 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 48 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v19, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 48 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v17, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s0 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v23, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v22, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[20:21], v[4:7] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[18:19], v[8:11] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[22:23], v[12:15] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v16i32: +; GCNX3-NOHSA: ; %bb.0: ; %entry +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:48 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v16i32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 11, @16, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 3 @8 +; EG-NEXT: ALU 1, @28, KC0[], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T7.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T2.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_128 T4.XYZW, T3.X, 32, #1 +; EG-NEXT: VTX_READ_128 T5.XYZW, T3.X, 48, #1 +; EG-NEXT: VTX_READ_128 T6.XYZW, T3.X, 0, #1 +; EG-NEXT: VTX_READ_128 T3.XYZW, T3.X, 16, #1 +; EG-NEXT: ALU clause starting at 16: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T2.X, PV.W, literal.x, +; EG-NEXT: MOV * T3.X, KC0[2].Z, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) +; EG-NEXT: ALU clause starting at 28: +; EG-NEXT: LSHR * T7.X, T0.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v16i32: +; GCN-HSA: ; %bb.0: ; %entry +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v16, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v16, s[2:3] offset:32 +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v16, s[2:3] offset:48 +; GCN-HSA-NEXT: global_load_dwordx4 v[8:11], v16, s[2:3] +; GCN-HSA-NEXT: global_load_dwordx4 v[12:15], v16, s[2:3] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-HSA-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:32 +; GCN-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-HSA-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:48 +; GCN-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-HSA-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GCN-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-HSA-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GCN-HSA-NEXT: s_endpgm entry: %ld = load <16 x i32>, ptr addrspace(1) %in store <16 x i32> %ld, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_zextload_i32_to_i64: -; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]], -; GCN-HSA-DAG: {{flat|global}}_load_dword v[[LO:[0-9]+]], -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} - -; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]] -; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]] - -; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY define amdgpu_kernel void @global_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_zextload_i32_to_i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: v_mov_b32_e32 v1, 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_zextload_i32_to_i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dword v0, v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_zextload_i32_to_i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v1, 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_zextload_i32_to_i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: MOV * T0.Y, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_zextload_i32_to_i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dword v0, v1, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load i32, ptr addrspace(1) %in %ext = zext i32 %ld to i64 store i64 %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_sextload_i32_to_i64: -; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]] -; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]] -; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] -; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]] -; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]] - - -; EG: MEM_RAT -; EG: VTX_READ_32 -; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal. -; EG: 31 define amdgpu_kernel void @global_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_sextload_i32_to_i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_sextload_i32_to_i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dword v0, v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCNX3-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_sextload_i32_to_i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_sextload_i32_to_i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, +; EG-NEXT: ASHR * T0.Y, T0.X, literal.y, +; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) +; +; GCN-HSA-LABEL: global_sextload_i32_to_i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dword v0, v2, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-HSA-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load i32, ptr addrspace(1) %in %ext = sext i32 %ld to i64 store i64 %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64: -; GCN-NOHSA: buffer_load_dword -; GCN-NOHSA: buffer_store_dwordx2 - -; GCN-HSA: {{flat|global}}_load_dword -; GCN-HSA: {{flat|global}}_store_dwordx2 define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_zextload_v1i32_to_v1i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: v_mov_b32_e32 v1, 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_zextload_v1i32_to_v1i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dword v0, v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_zextload_v1i32_to_v1i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v1, 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_zextload_v1i32_to_v1i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: MOV * T0.Y, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_zextload_v1i32_to_v1i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dword v0, v1, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <1 x i32>, ptr addrspace(1) %in %ext = zext <1 x i32> %ld to <1 x i64> store <1 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64: -; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]] -; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]] -; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] -; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]] -; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]] define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_sextload_v1i32_to_v1i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_sextload_v1i32_to_v1i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dword v0, v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCNX3-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_sextload_v1i32_to_v1i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dword v0, off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_sextload_v1i32_to_v1i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, +; EG-NEXT: ASHR * T0.Y, T0.X, literal.y, +; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) +; +; GCN-HSA-LABEL: global_sextload_v1i32_to_v1i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dword v0, v2, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-HSA-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <1 x i32>, ptr addrspace(1) %in %ext = sext <1 x i32> %ld to <1 x i64> store <1 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64: -; GCN-NOHSA: buffer_load_dwordx2 -; GCN-NOHSA: buffer_store_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx2 -; GCN-HSA: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_zextload_v2i32_to_v2i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v1, 0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v3, v1 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v5 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_zextload_v2i32_to_v2i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, v1 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_zextload_v2i32_to_v2i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx2 v[2:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v1, 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v0, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v2, v3 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v3, v1 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_zextload_v2i32_to_v2i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: MOV T1.X, T0.X, +; EG-NEXT: MOV T1.Y, 0.0, +; EG-NEXT: MOV T1.Z, T0.Y, +; EG-NEXT: MOV T1.W, 0.0, +; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_zextload_v2i32_to_v2i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx2 v[2:3], v1, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v2 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v3 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <2 x i32>, ptr addrspace(1) %in %ext = zext <2 x i32> %ld to <2 x i64> store <2 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64: -; GCN-NOHSA: buffer_load_dwordx2 -; GCN-HSA: {{flat|global}}_load_dwordx2 - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_sextload_v2i32_to_v2i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v1 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v1 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_sextload_v2i32_to_v2i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s1 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v1 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_sextload_v2i32_to_v2i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v1 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v2, v1 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_sextload_v2i32_to_v2i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: ASHR * T1.W, T0.Y, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR * T1.Y, T0.X, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T1.X, T0.X, +; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: MOV * T1.Z, T0.Y, +; +; GCN-HSA-LABEL: global_sextload_v2i32_to_v2i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v1 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v1 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-HSA-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <2 x i32>, ptr addrspace(1) %in %ext = sext <2 x i32> %ld to <2 x i64> store <2 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_zextload_v4i32_to_v4i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v5, 0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v7, v5 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v3 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v1 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_zextload_v4i32_to_v4i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v7, v5 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v6, v3 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] +; GCNX3-HSA-NEXT: s_nop 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v6, v1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s1 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_zextload_v4i32_to_v4i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v5, 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v7, v5 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v4, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v6, v3 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v4, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v6, v1 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_zextload_v4i32_to_v4i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: MOV T1.X, T0.Z, +; EG-NEXT: MOV T1.Y, 0.0, +; EG-NEXT: MOV * T2.X, T0.X, +; EG-NEXT: MOV T2.Y, 0.0, +; EG-NEXT: MOV T1.Z, T0.W, +; EG-NEXT: MOV T1.W, 0.0, +; EG-NEXT: MOV * T2.Z, T0.Y, +; EG-NEXT: MOV * T2.W, 0.0, +; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) +; EG-NEXT: LSHR * T3.X, PV.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_zextload_v4i32_to_v4i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v1, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v7 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:16 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v5 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <4 x i32>, ptr addrspace(1) %in %ext = zext <4 x i32> %ld to <4 x i64> store <4 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_sextload_v4i32_to_v4i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v6, 31, v1 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v8, 31, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v7, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v9, v3 +; SI-NOHSA-NEXT: v_mov_b32_e32 v3, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v5, v1 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[7:10], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_sextload_v4i32_to_v4i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v11, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v7, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, v3 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v6, 31, v1 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, v1 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[13:14], v[7:10] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[11:12], v[3:6] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_sextload_v4i32_to_v4i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v8, 31, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v7, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v9, v3 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v6, 31, v1 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v3, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v5, v1 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[7:10], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_sextload_v4i32_to_v4i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 15, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: ASHR * T1.W, T0.Y, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T2.X, KC0[2].Y, literal.x, +; EG-NEXT: ASHR T1.Y, T0.X, literal.y, +; EG-NEXT: ASHR T3.W, T0.W, literal.y, +; EG-NEXT: MOV * T1.X, T0.X, +; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) +; EG-NEXT: ASHR * T3.Y, T0.Z, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T3.X, T0.Z, +; EG-NEXT: MOV T1.Z, T0.Y, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: MOV * T3.Z, T0.W, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_sextload_v4i32_to_v4i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v11, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v3 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v2 +; GCN-HSA-NEXT: v_mov_b32_e32 v7, v2 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, v3 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 31, v1 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v4, 31, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v5, v1 +; GCN-HSA-NEXT: global_store_dwordx4 v11, v[7:10], s[0:1] offset:16 +; GCN-HSA-NEXT: global_store_dwordx4 v11, v[3:6], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <4 x i32>, ptr addrspace(1) %in %ext = sext <4 x i32> %ld to <4 x i64> store <4 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_zextload_v8i32_to_v8i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v9, 0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v11, v9 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(1) +; SI-NOHSA-NEXT: v_mov_b32_e32 v8, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v10, v3 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v8, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v10, v1 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v8, v6 +; SI-NOHSA-NEXT: v_mov_b32_e32 v10, v7 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v8, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v10, v5 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_zextload_v8i32_to_v8i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v11, v9 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v15, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 48 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v17, s3 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v3 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] +; GCNX3-HSA-NEXT: s_nop 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v1 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v7 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v5 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_zextload_v8i32_to_v8i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v9, 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v11, v9 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v8, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v10, v3 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v8, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v10, v1 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v8, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v10, v7 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v8, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v10, v5 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_zextload_v8i32_to_v8i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @8 +; EG-NEXT: ALU 26, @13, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T7.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T6.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 12: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 13: +; EG-NEXT: MOV T2.X, T1.Z, +; EG-NEXT: MOV T2.Y, 0.0, +; EG-NEXT: MOV * T3.X, T1.X, +; EG-NEXT: MOV * T3.Y, 0.0, +; EG-NEXT: MOV T4.X, T0.Z, +; EG-NEXT: MOV T4.Y, 0.0, +; EG-NEXT: MOV * T5.X, T0.X, +; EG-NEXT: MOV T5.Y, 0.0, +; EG-NEXT: MOV T2.Z, T1.W, +; EG-NEXT: MOV T2.W, 0.0, +; EG-NEXT: MOV * T3.Z, T1.Y, +; EG-NEXT: MOV * T3.W, 0.0, +; EG-NEXT: MOV T4.Z, T0.W, +; EG-NEXT: MOV T4.W, 0.0, +; EG-NEXT: MOV * T5.Z, T0.Y, +; EG-NEXT: MOV * T5.W, 0.0, +; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) +; EG-NEXT: LSHR T1.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) +; EG-NEXT: LSHR T6.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) +; EG-NEXT: LSHR * T7.X, PV.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_zextload_v8i32_to_v8i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v1, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx4 v[8:11], v1, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(1) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v7 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:48 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v5 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:32 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v10 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v11 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:16 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v8 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v9 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <8 x i32>, ptr addrspace(1) %in %ext = zext <8 x i32> %ld to <8 x i64> store <8 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_sextload_v8i32_to_v8i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(1) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v8, 31, v0 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v14, 31, v3 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v12, 31, v2 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v18, 31, v5 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v16, 31, v4 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v22, 31, v7 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v20, 31, v6 +; SI-NOHSA-NEXT: v_mov_b32_e32 v19, v6 +; SI-NOHSA-NEXT: v_mov_b32_e32 v21, v7 +; SI-NOHSA-NEXT: v_mov_b32_e32 v15, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v17, v5 +; SI-NOHSA-NEXT: v_mov_b32_e32 v11, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v13, v3 +; SI-NOHSA-NEXT: v_mov_b32_e32 v7, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v9, v1 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[19:22], off, s[4:7], 0 offset:48 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[15:18], off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[7:10], off, s[4:7], 0 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_sextload_v8i32_to_v8i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v19, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 48 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v17, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s0 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, s3 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v23, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v22, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v1 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v0 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v3 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, v3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v1 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v5 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v7 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v7 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v5 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[20:21], v[8:11] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_sextload_v8i32_to_v8i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s2, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s3 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s7 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_mov_b32 s0, s4 +; GCNX3-NOHSA-NEXT: s_mov_b32 s1, s5 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v22, 31, v7 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v20, 31, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v19, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v21, v7 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v8, 31, v0 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v14, 31, v3 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v12, 31, v2 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v18, 31, v5 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v16, 31, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v15, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v17, v5 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v11, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v13, v3 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v7, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v9, v1 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_sextload_v8i32_to_v8i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @8 +; EG-NEXT: ALU 31, @13, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T0.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T2.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 12: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 13: +; EG-NEXT: LSHR T2.X, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) +; EG-NEXT: LSHR T3.X, PV.W, literal.x, +; EG-NEXT: ADD_INT T2.W, KC0[2].Y, literal.y, +; EG-NEXT: ASHR * T4.W, T0.Y, literal.z, +; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T5.X, PV.W, literal.x, +; EG-NEXT: ASHR T4.Y, T0.X, literal.y, +; EG-NEXT: ASHR T6.W, T0.W, literal.y, +; EG-NEXT: MOV * T4.X, T0.X, +; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) +; EG-NEXT: ASHR T6.Y, T0.Z, literal.x, +; EG-NEXT: ASHR * T7.W, T1.Y, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T6.X, T0.Z, +; EG-NEXT: ASHR T7.Y, T1.X, literal.x, +; EG-NEXT: MOV T4.Z, T0.Y, +; EG-NEXT: ASHR T8.W, T1.W, literal.x, +; EG-NEXT: MOV * T7.X, T1.X, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T8.Y, T1.Z, literal.x, +; EG-NEXT: MOV * T6.Z, T0.W, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T8.X, T1.Z, +; EG-NEXT: MOV T7.Z, T1.Y, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: MOV * T8.Z, T1.W, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_sextload_v8i32_to_v8i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v23, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v23, s[2:3] +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v23, s[2:3] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(1) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 31, v7 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v20, 31, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v19, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v21, v7 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v0 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v3 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 31, v2 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v5 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v15, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v17, v5 +; GCN-HSA-NEXT: v_mov_b32_e32 v11, v2 +; GCN-HSA-NEXT: v_mov_b32_e32 v13, v3 +; GCN-HSA-NEXT: v_mov_b32_e32 v7, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v9, v1 +; GCN-HSA-NEXT: global_store_dwordx4 v23, v[19:22], s[0:1] offset:48 +; GCN-HSA-NEXT: global_store_dwordx4 v23, v[15:18], s[0:1] offset:32 +; GCN-HSA-NEXT: global_store_dwordx4 v23, v[11:14], s[0:1] offset:16 +; GCN-HSA-NEXT: global_store_dwordx4 v23, v[7:10], s[0:1] +; GCN-HSA-NEXT: s_endpgm %ld = load <8 x i32>, ptr addrspace(1) %in %ext = sext <8 x i32> %ld to <8 x i64> store <8 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_sextload_v16i32_to_v16i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s2, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s2 +; SI-NOHSA-NEXT: s_mov_b32 s11, s3 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s6 +; SI-NOHSA-NEXT: s_mov_b32 s9, s7 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(3) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v19, 31, v3 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v17, 31, v2 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v23, 31, v1 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v21, 31, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v20, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v22, v1 +; SI-NOHSA-NEXT: v_mov_b32_e32 v16, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v18, v3 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(2) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v7 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v6 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v27, 31, v5 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v25, 31, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v24, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v26, v5 +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v6 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v7 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(1) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v7, 31, v11 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v5, 31, v10 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v31, 31, v9 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v29, 31, v8 +; SI-NOHSA-NEXT: v_mov_b32_e32 v28, v8 +; SI-NOHSA-NEXT: v_mov_b32_e32 v30, v9 +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v10 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v11 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v11, 31, v15 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v9, 31, v14 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v35, 31, v13 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v33, 31, v12 +; SI-NOHSA-NEXT: v_mov_b32_e32 v32, v12 +; SI-NOHSA-NEXT: v_mov_b32_e32 v34, v13 +; SI-NOHSA-NEXT: v_mov_b32_e32 v8, v14 +; SI-NOHSA-NEXT: v_mov_b32_e32 v10, v15 +; SI-NOHSA-NEXT: s_mov_b32 s0, s4 +; SI-NOHSA-NEXT: s_mov_b32 s1, s5 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:64 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:32 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_sextload_v16i32_to_v16i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 48 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 32 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v23, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v22, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x60 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v25, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v24, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x70 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v27, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v26, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 64 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, s0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v9 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v9 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x50 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v29, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 32 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v11 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v11 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(4) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v15 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v14 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v13 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v12 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v12 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v13 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v14 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v15 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 48 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v23, s3 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[24:25], v[16:19] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(5) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v5 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v7 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v6 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, v5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v7 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v22, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v31, s1 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[28:29], v[8:11] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v30, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(6) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v1 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v10, v1 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v6, v3 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[22:23], v[8:11] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[30:31], v[4:7] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_sextload_v16i32_to_v16i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s2, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s3 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s7 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s0, s4 +; GCNX3-NOHSA-NEXT: s_mov_b32 s1, s5 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v19, 31, v3 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v27, 31, v5 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v25, 31, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v24, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v26, v5 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v17, 31, v2 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v23, 31, v7 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v21, 31, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v20, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v22, v7 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v5, 31, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v4, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v6, v1 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v3 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v11 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v10 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v31, 31, v9 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v29, 31, v8 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v8 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v9 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v0, v10 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v2, v11 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(0) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v11, 31, v15 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v9, 31, v14 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v35, 31, v13 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v33, 31, v12 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v32, v12 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v34, v13 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v8, v14 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v10, v15 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:112 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_sextload_v16i32_to_v16i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 3 @12 +; EG-NEXT: ALU 64, @21, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T11.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T9.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T8.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T6.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T4.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 12: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 32, #1 +; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 20: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 21: +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T4.X, PV.W, literal.x, +; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T6.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) +; EG-NEXT: LSHR T7.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) +; EG-NEXT: LSHR T8.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) +; EG-NEXT: LSHR T9.X, PV.W, literal.x, +; EG-NEXT: ADD_INT T4.W, KC0[2].Y, literal.y, +; EG-NEXT: ASHR * T10.W, T0.W, literal.z, +; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T11.X, PV.W, literal.x, +; EG-NEXT: ASHR T10.Y, T0.Z, literal.y, +; EG-NEXT: ASHR T12.W, T0.Y, literal.y, +; EG-NEXT: MOV * T10.X, T0.Z, +; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) +; EG-NEXT: ASHR T12.Y, T0.X, literal.x, +; EG-NEXT: ASHR * T13.W, T3.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T12.X, T0.X, +; EG-NEXT: ASHR T13.Y, T3.Z, literal.x, +; EG-NEXT: MOV T10.Z, T0.W, +; EG-NEXT: ASHR T14.W, T3.Y, literal.x, +; EG-NEXT: MOV * T13.X, T3.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T14.Y, T3.X, literal.x, +; EG-NEXT: MOV T12.Z, T0.Y, +; EG-NEXT: ASHR * T0.W, T2.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T14.X, T3.X, +; EG-NEXT: ASHR T0.Y, T2.Z, literal.x, +; EG-NEXT: MOV T13.Z, T3.W, +; EG-NEXT: ASHR T15.W, T2.Y, literal.x, +; EG-NEXT: MOV * T0.X, T2.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T15.Y, T2.X, literal.x, +; EG-NEXT: MOV T14.Z, T3.Y, +; EG-NEXT: ASHR * T3.W, T1.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T15.X, T2.X, +; EG-NEXT: ASHR T3.Y, T1.Z, literal.x, +; EG-NEXT: MOV T0.Z, T2.W, +; EG-NEXT: ASHR T16.W, T1.Y, literal.x, +; EG-NEXT: MOV * T3.X, T1.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T16.Y, T1.X, literal.x, +; EG-NEXT: MOV * T15.Z, T2.Y, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T16.X, T1.X, +; EG-NEXT: MOV T3.Z, T1.W, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, +; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) +; EG-NEXT: LSHR T1.X, PV.W, literal.x, +; EG-NEXT: MOV * T16.Z, T1.Y, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_sextload_v16i32_to_v16i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v36, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v36, s[2:3] offset:32 +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v36, s[2:3] offset:48 +; GCN-HSA-NEXT: global_load_dwordx4 v[8:11], v36, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx4 v[12:15], v36, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v3 +; GCN-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 31, v5 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 31, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v24, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v26, v5 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v2 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 31, v7 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 31, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v20, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v22, v7 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v4, v0 +; GCN-HSA-NEXT: v_mov_b32_e32 v6, v1 +; GCN-HSA-NEXT: v_mov_b32_e32 v16, v2 +; GCN-HSA-NEXT: v_mov_b32_e32 v18, v3 +; GCN-HSA-NEXT: s_waitcnt vmcnt(1) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v11 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v10 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v31, 31, v9 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v29, 31, v8 +; GCN-HSA-NEXT: v_mov_b32_e32 v28, v8 +; GCN-HSA-NEXT: v_mov_b32_e32 v30, v9 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v10 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v11 +; GCN-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v15 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v14 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v35, 31, v13 +; GCN-HSA-NEXT: v_ashrrev_i32_e32 v33, 31, v12 +; GCN-HSA-NEXT: v_mov_b32_e32 v32, v12 +; GCN-HSA-NEXT: v_mov_b32_e32 v34, v13 +; GCN-HSA-NEXT: v_mov_b32_e32 v8, v14 +; GCN-HSA-NEXT: v_mov_b32_e32 v10, v15 +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[24:27], s[0:1] offset:96 +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[20:23], s[0:1] offset:112 +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1] offset:64 +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:80 +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[28:31], s[0:1] offset:32 +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] offset:48 +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[32:35], s[0:1] +; GCN-HSA-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:16 +; GCN-HSA-NEXT: s_endpgm %ld = load <16 x i32>, ptr addrspace(1) %in %ext = sext <16 x i32> %ld to <16 x i64> store <16 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 - -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 -; GCN-HSA: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_zextload_v16i32_to_v16i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s2, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s2 +; SI-NOHSA-NEXT: s_mov_b32 s11, s3 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s6 +; SI-NOHSA-NEXT: s_mov_b32 s9, s7 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48 +; SI-NOHSA-NEXT: v_mov_b32_e32 v5, 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: v_mov_b32_e32 v7, v5 +; SI-NOHSA-NEXT: s_mov_b32 s0, s4 +; SI-NOHSA-NEXT: s_mov_b32 s1, s5 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(3) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v1 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v3 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:112 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(4) expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v8 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v9 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v10 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v11 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(4) expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v16 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v17 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v18 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v19 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:48 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v12 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v13 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v14 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v15 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_zextload_v16i32_to_v16i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v17, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v19, v17 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s6, s2, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s7, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 48 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s7 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s6 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s4 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v23, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v22, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x60 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v25, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v24, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x70 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v27, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v26, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 64 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, s0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x50 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v3 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s2 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s3 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(4) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v5 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 32 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[24:25], v[16:19] +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v7 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[26:27], v[16:19] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, s3 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(5) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v9 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 48 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v11 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[2:3], v[16:19] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(6) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v12 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v13 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, v14 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v18, v15 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_zextload_v16i32_to_v16i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s2, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s3 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s7 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v17, 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v19, v17 +; GCNX3-NOHSA-NEXT: s_mov_b32 s0, s4 +; GCNX3-NOHSA-NEXT: s_mov_b32 s1, s5 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v1 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v3 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(4) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v5 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:64 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v7 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(5) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v8 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v9 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v10 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v11 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(6) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v12 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v13 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v14 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v15 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_zextload_v16i32_to_v16i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 3 @12 +; EG-NEXT: ALU 55, @21, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T15.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T14.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T13.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T12.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T2.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 12: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 0, #1 +; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 32, #1 +; EG-NEXT: ALU clause starting at 20: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 21: +; EG-NEXT: MOV T4.X, T1.X, +; EG-NEXT: MOV T4.Y, 0.0, +; EG-NEXT: MOV * T5.X, T1.Z, +; EG-NEXT: MOV * T5.Y, 0.0, +; EG-NEXT: MOV T6.X, T0.X, +; EG-NEXT: MOV T6.Y, 0.0, +; EG-NEXT: MOV * T7.X, T0.Z, +; EG-NEXT: MOV * T7.Y, 0.0, +; EG-NEXT: MOV T8.X, T3.X, +; EG-NEXT: MOV T8.Y, 0.0, +; EG-NEXT: MOV * T9.X, T3.Z, +; EG-NEXT: MOV * T9.Y, 0.0, +; EG-NEXT: MOV T10.X, T2.X, +; EG-NEXT: MOV T10.Y, 0.0, +; EG-NEXT: MOV * T11.X, T2.Z, +; EG-NEXT: MOV T11.Y, 0.0, +; EG-NEXT: MOV T4.Z, T1.Y, +; EG-NEXT: MOV T4.W, 0.0, +; EG-NEXT: MOV * T5.Z, T1.W, +; EG-NEXT: MOV * T5.W, 0.0, +; EG-NEXT: MOV T6.Z, T0.Y, +; EG-NEXT: MOV T6.W, 0.0, +; EG-NEXT: MOV * T7.Z, T0.W, +; EG-NEXT: MOV * T7.W, 0.0, +; EG-NEXT: MOV T8.Z, T3.Y, +; EG-NEXT: MOV T8.W, 0.0, +; EG-NEXT: MOV * T9.Z, T3.W, +; EG-NEXT: MOV * T9.W, 0.0, +; EG-NEXT: MOV T10.Z, T2.Y, +; EG-NEXT: MOV T10.W, 0.0, +; EG-NEXT: MOV * T11.Z, T2.W, +; EG-NEXT: MOV T11.W, 0.0, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PS, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T2.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) +; EG-NEXT: LSHR T3.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) +; EG-NEXT: LSHR T12.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) +; EG-NEXT: LSHR T13.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) +; EG-NEXT: LSHR T14.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) +; EG-NEXT: LSHR * T15.X, PV.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_zextload_v16i32_to_v16i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v1, s[2:3] offset:48 +; GCN-HSA-NEXT: global_load_dwordx4 v[8:11], v1, s[2:3] offset:32 +; GCN-HSA-NEXT: global_load_dwordx4 v[12:15], v1, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx4 v[16:19], v1, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v5 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:96 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v7 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:112 +; GCN-HSA-NEXT: s_waitcnt vmcnt(4) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v8 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v9 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:64 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v10 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v11 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:80 +; GCN-HSA-NEXT: s_waitcnt vmcnt(5) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v12 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v13 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:32 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v14 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v15 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:48 +; GCN-HSA-NEXT: s_waitcnt vmcnt(6) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v16 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v17 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v18 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v19 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:16 +; GCN-HSA-NEXT: s_endpgm %ld = load <16 x i32>, ptr addrspace(1) %in %ext = zext <16 x i32> %ld to <16 x i64> store <16 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64: - -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA-DAG: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 -; GCN-DAG: v_ashrrev_i32 - -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 - -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 - -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 - -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_sextload_v32i32_to_v32i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; SI-NOHSA-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; SI-NOHSA-NEXT: s_mov_b32 s14, -1 +; SI-NOHSA-NEXT: s_mov_b32 s15, 0xe8f000 +; SI-NOHSA-NEXT: s_add_u32 s12, s12, s3 +; SI-NOHSA-NEXT: s_addc_u32 s13, s13, 0 +; SI-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s2, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s2 +; SI-NOHSA-NEXT: s_mov_b32 s11, s3 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s6 +; SI-NOHSA-NEXT: s_mov_b32 s9, s7 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:96 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:112 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:80 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:64 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:48 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(7) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v47, 31, v31 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v45, 31, v30 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(6) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v39, 31, v15 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v37, 31, v14 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v43, 31, v13 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v41, 31, v12 +; SI-NOHSA-NEXT: v_mov_b32_e32 v40, v12 +; SI-NOHSA-NEXT: v_mov_b32_e32 v42, v13 +; SI-NOHSA-NEXT: v_mov_b32_e32 v36, v14 +; SI-NOHSA-NEXT: v_mov_b32_e32 v38, v15 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v35, 31, v29 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v33, 31, v28 +; SI-NOHSA-NEXT: v_mov_b32_e32 v32, v28 +; SI-NOHSA-NEXT: v_mov_b32_e32 v34, v29 +; SI-NOHSA-NEXT: v_mov_b32_e32 v44, v30 +; SI-NOHSA-NEXT: v_mov_b32_e32 v46, v31 +; SI-NOHSA-NEXT: buffer_store_dword v44, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dword v45, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill +; SI-NOHSA-NEXT: buffer_store_dword v46, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill +; SI-NOHSA-NEXT: buffer_store_dword v47, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v15, 31, v7 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v13, 31, v6 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v47, 31, v5 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v45, 31, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v44, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v46, v5 +; SI-NOHSA-NEXT: v_mov_b32_e32 v12, v6 +; SI-NOHSA-NEXT: v_mov_b32_e32 v14, v7 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v5, 31, v2 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v51, 31, v1 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v49, 31, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v48, v0 +; SI-NOHSA-NEXT: v_mov_b32_e32 v50, v1 +; SI-NOHSA-NEXT: v_mov_b32_e32 v4, v2 +; SI-NOHSA-NEXT: v_mov_b32_e32 v6, v3 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v19 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v18 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v55, 31, v17 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v53, 31, v16 +; SI-NOHSA-NEXT: v_mov_b32_e32 v52, v16 +; SI-NOHSA-NEXT: v_mov_b32_e32 v54, v17 +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v18 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v19 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v19, 31, v23 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v17, 31, v22 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v59, 31, v21 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v57, 31, v20 +; SI-NOHSA-NEXT: v_mov_b32_e32 v56, v20 +; SI-NOHSA-NEXT: v_mov_b32_e32 v58, v21 +; SI-NOHSA-NEXT: v_mov_b32_e32 v16, v22 +; SI-NOHSA-NEXT: v_mov_b32_e32 v18, v23 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v23, 31, v27 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v21, 31, v26 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v63, 31, v25 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v61, 31, v24 +; SI-NOHSA-NEXT: v_mov_b32_e32 v60, v24 +; SI-NOHSA-NEXT: v_mov_b32_e32 v62, v25 +; SI-NOHSA-NEXT: v_mov_b32_e32 v20, v26 +; SI-NOHSA-NEXT: v_mov_b32_e32 v22, v27 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v27, 31, v11 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v25, 31, v10 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v31, 31, v9 +; SI-NOHSA-NEXT: v_ashrrev_i32_e32 v29, 31, v8 +; SI-NOHSA-NEXT: v_mov_b32_e32 v28, v8 +; SI-NOHSA-NEXT: v_mov_b32_e32 v30, v9 +; SI-NOHSA-NEXT: v_mov_b32_e32 v24, v10 +; SI-NOHSA-NEXT: v_mov_b32_e32 v26, v11 +; SI-NOHSA-NEXT: s_mov_b32 s0, s4 +; SI-NOHSA-NEXT: s_mov_b32 s1, s5 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:224 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:240 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:192 +; SI-NOHSA-NEXT: buffer_load_dword v8, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; SI-NOHSA-NEXT: buffer_load_dword v9, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload +; SI-NOHSA-NEXT: buffer_load_dword v10, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; SI-NOHSA-NEXT: buffer_load_dword v11, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload +; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:160 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:128 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:144 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:96 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:64 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:32 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:16 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_sextload_v32i32_to_v32i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[28:31], v[0:1] +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 0x70 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[24:27], v[0:1] +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 0x60 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 0x50 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[20:23], v[0:1] +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 64 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s5 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 48 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s4 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[16:19], v[8:9] +; GCNX3-HSA-NEXT: s_add_u32 s6, s2, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s7, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s7 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s6 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v37, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v36, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v35, 31, v29 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v33, 31, v28 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v32, v28 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v34, v29 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v29, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xe0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[36:37], v[32:35] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v37, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v36, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xf0 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v35, 31, v31 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v33, 31, v30 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v32, v30 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v34, v31 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[28:29], v[32:35] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(8) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v31, 31, v25 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v33, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v32, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xc0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v35, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v34, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xd0 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v29, 31, v24 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, v24 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v30, v25 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[36:37], v[28:31] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v37, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v36, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xa0 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v31, 31, v27 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v29, 31, v26 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, v26 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v30, v27 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[32:33], v[28:31] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v33, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v32, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xb0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v39, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v38, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x80 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(9) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v27, 31, v21 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v25, 31, v20 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v24, v20 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v26, v21 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v31, 31, v23 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v29, 31, v22 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, v22 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v30, v23 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[34:35], v[24:27] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[36:37], v[28:31] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(10) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v23, 31, v15 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v21, 31, v14 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v27, 31, v13 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v25, 31, v12 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v24, v12 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v26, v13 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, v14 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v22, v15 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(9) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v5 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v14, v5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x90 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[32:33], v[24:27] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[38:39], v[20:23] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x60 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v26, 31, v7 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v24, 31, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v23, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v25, v7 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[23:26] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(12) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, v16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v15, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x70 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v17 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v6, v17 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[15:16], v[4:7] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v15, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 64 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v26, 31, v19 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v24, 31, v18 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v23, v18 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v25, v19 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[15:16], v[23:26] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(12) +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v9 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v15, v8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v17, v9 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x50 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[15:18] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 32 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v1 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v12, 31, v0 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v11 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v6, v11 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v11, v0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, v1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 48 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[0:1], v[11:14] +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s0 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v22, 31, v3 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v20, 31, v2 +; GCNX3-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, v10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v19, v2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, v3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s1 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[0:1], v[19:22] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_sextload_v32i32_to_v32i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s2, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s3 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s7 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:96 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:112 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:80 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:64 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_mov_b32 s0, s4 +; GCNX3-NOHSA-NEXT: s_mov_b32 s1, s5 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v35, 31, v11 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v33, 31, v10 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(6) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v39, 31, v15 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v37, 31, v14 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v43, 31, v13 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v41, 31, v12 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v40, v12 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v42, v13 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v36, v14 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v38, v15 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v15, 31, v9 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v13, 31, v8 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v12, v8 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v14, v9 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v32, v10 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v34, v11 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(5) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v11, 31, v7 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v9, 31, v6 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v47, 31, v5 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v45, 31, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v44, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v46, v5 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v8, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v10, v7 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(4) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v5, 31, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v4, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v6, v3 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(3) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v3, 31, v19 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v2, v19 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v19, 31, v23 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v51, 31, v1 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v49, 31, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v48, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v50, v1 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v1, 31, v18 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v55, 31, v17 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v53, 31, v16 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v52, v16 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v54, v17 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v0, v18 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v17, 31, v22 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v59, 31, v21 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v57, 31, v20 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v56, v20 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v58, v21 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v16, v22 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v18, v23 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(1) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v22, 31, v27 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v20, 31, v26 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:224 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:240 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v42, 31, v25 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v40, 31, v24 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(2) +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v38, 31, v31 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v36, 31, v30 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v37, v31 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v15, 31, v29 +; GCNX3-NOHSA-NEXT: v_ashrrev_i32_e32 v13, 31, v28 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v12, v28 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v14, v29 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:208 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:160 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:128 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:144 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:96 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:64 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v35, v30 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v39, v24 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v41, v25 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v19, v26 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v21, v27 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[39:42], off, s[0:3], 0 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_sextload_v32i32_to_v32i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 33, @36, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 7 @20 +; EG-NEXT: ALU 96, @70, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T12.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T23.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T20.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T19.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T10.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T9.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T8.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T7.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T6.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T4.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T2.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 20: +; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 112, #1 +; EG-NEXT: VTX_READ_128 T13.XYZW, T11.X, 96, #1 +; EG-NEXT: VTX_READ_128 T14.XYZW, T11.X, 80, #1 +; EG-NEXT: VTX_READ_128 T15.XYZW, T11.X, 64, #1 +; EG-NEXT: VTX_READ_128 T16.XYZW, T11.X, 48, #1 +; EG-NEXT: VTX_READ_128 T17.XYZW, T11.X, 32, #1 +; EG-NEXT: VTX_READ_128 T18.XYZW, T11.X, 16, #1 +; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 +; EG-NEXT: ALU clause starting at 36: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T2.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) +; EG-NEXT: LSHR T3.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) +; EG-NEXT: LSHR T4.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) +; EG-NEXT: LSHR T5.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) +; EG-NEXT: LSHR T6.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) +; EG-NEXT: LSHR T7.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) +; EG-NEXT: LSHR T8.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) +; EG-NEXT: LSHR T9.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) +; EG-NEXT: LSHR T10.X, PV.W, literal.x, +; EG-NEXT: MOV * T11.X, KC0[2].Z, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ALU clause starting at 70: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) +; EG-NEXT: LSHR T19.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) +; EG-NEXT: LSHR T20.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) +; EG-NEXT: LSHR T21.X, PV.W, literal.x, +; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, +; EG-NEXT: ASHR * T22.W, T11.W, literal.z, +; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T23.X, PV.W, literal.x, +; EG-NEXT: ASHR T22.Y, T11.Z, literal.y, +; EG-NEXT: ASHR T24.W, T11.Y, literal.y, +; EG-NEXT: MOV * T22.X, T11.Z, +; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) +; EG-NEXT: ASHR T24.Y, T11.X, literal.x, +; EG-NEXT: ASHR * T25.W, T18.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T24.X, T11.X, +; EG-NEXT: ASHR T25.Y, T18.Z, literal.x, +; EG-NEXT: MOV T22.Z, T11.W, +; EG-NEXT: ASHR T26.W, T18.Y, literal.x, +; EG-NEXT: MOV * T25.X, T18.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T26.Y, T18.X, literal.x, +; EG-NEXT: MOV T24.Z, T11.Y, +; EG-NEXT: ASHR * T11.W, T17.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T26.X, T18.X, +; EG-NEXT: ASHR T11.Y, T17.Z, literal.x, +; EG-NEXT: MOV T25.Z, T18.W, +; EG-NEXT: ASHR T27.W, T17.Y, literal.x, +; EG-NEXT: MOV * T11.X, T17.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T27.Y, T17.X, literal.x, +; EG-NEXT: MOV T26.Z, T18.Y, +; EG-NEXT: ASHR * T18.W, T16.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T27.X, T17.X, +; EG-NEXT: ASHR T18.Y, T16.Z, literal.x, +; EG-NEXT: MOV T11.Z, T17.W, +; EG-NEXT: ASHR T28.W, T16.Y, literal.x, +; EG-NEXT: MOV * T18.X, T16.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T28.Y, T16.X, literal.x, +; EG-NEXT: MOV T27.Z, T17.Y, +; EG-NEXT: ASHR * T17.W, T15.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T28.X, T16.X, +; EG-NEXT: ASHR T17.Y, T15.Z, literal.x, +; EG-NEXT: MOV T18.Z, T16.W, +; EG-NEXT: ASHR T29.W, T15.Y, literal.x, +; EG-NEXT: MOV * T17.X, T15.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T29.Y, T15.X, literal.x, +; EG-NEXT: MOV T28.Z, T16.Y, +; EG-NEXT: ASHR * T16.W, T14.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T29.X, T15.X, +; EG-NEXT: ASHR T16.Y, T14.Z, literal.x, +; EG-NEXT: MOV T17.Z, T15.W, +; EG-NEXT: ASHR T30.W, T14.Y, literal.x, +; EG-NEXT: MOV * T16.X, T14.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T30.Y, T14.X, literal.x, +; EG-NEXT: MOV T29.Z, T15.Y, +; EG-NEXT: ASHR * T15.W, T13.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T30.X, T14.X, +; EG-NEXT: ASHR T15.Y, T13.Z, literal.x, +; EG-NEXT: MOV T16.Z, T14.W, +; EG-NEXT: ASHR T31.W, T13.Y, literal.x, +; EG-NEXT: MOV * T15.X, T13.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T31.Y, T13.X, literal.x, +; EG-NEXT: MOV T30.Z, T14.Y, +; EG-NEXT: ASHR * T14.W, T12.W, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T31.X, T13.X, +; EG-NEXT: ASHR T14.Y, T12.Z, literal.x, +; EG-NEXT: MOV T15.Z, T13.W, +; EG-NEXT: ASHR T32.W, T12.Y, literal.x, +; EG-NEXT: MOV * T14.X, T12.Z, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: ASHR T32.Y, T12.X, literal.x, +; EG-NEXT: MOV * T31.Z, T13.Y, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; EG-NEXT: MOV T32.X, T12.X, +; EG-NEXT: MOV T14.Z, T12.W, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) +; EG-NEXT: LSHR T12.X, PV.W, literal.x, +; EG-NEXT: MOV * T32.Z, T12.Y, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-GFX900-HSA-LABEL: global_sextload_v32i32_to_v32i64: +; GCN-GFX900-HSA: ; %bb.0: +; GCN-GFX900-HSA-NEXT: s_mov_b64 s[10:11], s[2:3] +; GCN-GFX900-HSA-NEXT: s_mov_b64 s[8:9], s[0:1] +; GCN-GFX900-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v8, 0 +; GCN-GFX900-HSA-NEXT: s_add_u32 s8, s8, s7 +; GCN-GFX900-HSA-NEXT: s_addc_u32 s9, s9, 0 +; GCN-GFX900-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] offset:96 +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:112 +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[9:12], v8, s[2:3] offset:80 +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[13:16], v8, s[2:3] offset:64 +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[17:20], v8, s[2:3] offset:48 +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[21:24], v8, s[2:3] offset:32 +; GCN-GFX900-HSA-NEXT: s_waitcnt vmcnt(5) +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v28, 31, v3 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v26, 31, v2 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v25, v2 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v27, v3 +; GCN-GFX900-HSA-NEXT: s_waitcnt vmcnt(4) +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v32, 31, v7 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v30, 31, v6 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v36, 31, v5 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v34, 31, v4 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v33, v4 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v35, v5 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v29, v6 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v31, v7 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v0 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v4, v0 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v6, v1 +; GCN-GFX900-HSA-NEXT: buffer_store_dword v25, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN-GFX900-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-GFX900-HSA-NEXT: buffer_store_dword v26, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill +; GCN-GFX900-HSA-NEXT: buffer_store_dword v27, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill +; GCN-GFX900-HSA-NEXT: buffer_store_dword v28, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v28, 31, v12 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v26, 31, v11 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v40, 31, v10 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v38, 31, v9 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v37, v9 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v39, v10 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v25, v11 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v27, v12 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v12, 31, v16 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v15 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v44, 31, v14 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v42, 31, v13 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v41, v13 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v43, v14 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v9, v15 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v11, v16 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v20 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v19 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v48, 31, v18 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v46, 31, v17 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v45, v17 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v47, v18 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v13, v19 +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[49:52], v8, s[2:3] offset:16 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v15, v20 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v20, 31, v24 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v23 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v56, 31, v22 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v54, 31, v21 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v53, v21 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v55, v22 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v17, v23 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v19, v24 +; GCN-GFX900-HSA-NEXT: global_load_dwordx4 v[21:24], v8, s[2:3] +; GCN-GFX900-HSA-NEXT: s_nop 0 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[33:36], s[0:1] offset:224 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[29:32], s[0:1] offset:240 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:192 +; GCN-GFX900-HSA-NEXT: buffer_load_dword v32, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-GFX900-HSA-NEXT: s_nop 0 +; GCN-GFX900-HSA-NEXT: buffer_load_dword v33, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload +; GCN-GFX900-HSA-NEXT: buffer_load_dword v34, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload +; GCN-GFX900-HSA-NEXT: buffer_load_dword v35, off, s[8:11], 0 offset:16 ; 4-byte Folded Reload +; GCN-GFX900-HSA-NEXT: s_waitcnt vmcnt(8) +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v60, 31, v52 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v58, 31, v51 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v50 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v49 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v0, v49 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v2, v50 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v57, v51 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v59, v52 +; GCN-GFX900-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v31, 31, v24 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v29, 31, v23 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v22 +; GCN-GFX900-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v21 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v4, v21 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v6, v22 +; GCN-GFX900-HSA-NEXT: s_waitcnt vmcnt(0) +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[32:35], s[0:1] offset:208 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[37:40], s[0:1] offset:160 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[25:28], s[0:1] offset:176 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[41:44], s[0:1] offset:128 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[9:12], s[0:1] offset:144 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[45:48], s[0:1] offset:96 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[13:16], s[0:1] offset:112 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[53:56], s[0:1] offset:64 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[17:20], s[0:1] offset:80 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[57:60], s[0:1] offset:48 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v28, v23 +; GCN-GFX900-HSA-NEXT: v_mov_b32_e32 v30, v24 +; GCN-GFX900-HSA-NEXT: global_store_dwordx4 v8, v[28:31], s[0:1] offset:16 +; GCN-GFX900-HSA-NEXT: s_endpgm +; +; GCN-GFX908-HSA-LABEL: global_sextload_v32i32_to_v32i64: +; GCN-GFX908-HSA: ; %bb.0: +; GCN-GFX908-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v8, 0 +; GCN-GFX908-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] offset:96 +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:112 +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[9:12], v8, s[2:3] offset:80 +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[13:16], v8, s[2:3] offset:64 +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[17:20], v8, s[2:3] offset:48 +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[21:24], v8, s[2:3] offset:32 +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[49:52], v8, s[2:3] offset:16 +; GCN-GFX908-HSA-NEXT: s_waitcnt vmcnt(6) +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v25, v2 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v28, 31, v3 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v26, 31, v2 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v27, v3 +; GCN-GFX908-HSA-NEXT: v_accvgpr_write_b32 a0, v25 +; GCN-GFX908-HSA-NEXT: v_accvgpr_write_b32 a1, v26 +; GCN-GFX908-HSA-NEXT: v_accvgpr_write_b32 a2, v27 +; GCN-GFX908-HSA-NEXT: v_accvgpr_write_b32 a3, v28 +; GCN-GFX908-HSA-NEXT: s_waitcnt vmcnt(4) +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v28, 31, v12 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v26, 31, v11 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v40, 31, v10 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v38, 31, v9 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v37, v9 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v39, v10 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v25, v11 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v27, v12 +; GCN-GFX908-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v12, 31, v16 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v15 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v44, 31, v14 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v42, 31, v13 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v41, v13 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v43, v14 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v9, v15 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v11, v16 +; GCN-GFX908-HSA-NEXT: s_waitcnt vmcnt(2) +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v20 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v19 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v48, 31, v18 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v46, 31, v17 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v45, v17 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v47, v18 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v13, v19 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v15, v20 +; GCN-GFX908-HSA-NEXT: s_waitcnt vmcnt(1) +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v20, 31, v24 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v23 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v56, 31, v22 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v54, 31, v21 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v53, v21 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v55, v22 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v17, v23 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v19, v24 +; GCN-GFX908-HSA-NEXT: global_load_dwordx4 v[21:24], v8, s[2:3] +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v32, 31, v7 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v36, 31, v5 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v34, 31, v4 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v33, v4 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v35, v5 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v30, 31, v6 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v29, v6 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v31, v7 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[33:36], s[0:1] offset:224 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[29:32], s[0:1] offset:240 +; GCN-GFX908-HSA-NEXT: v_accvgpr_read_b32 v35, a3 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v1 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v0 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v4, v0 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v6, v1 +; GCN-GFX908-HSA-NEXT: v_accvgpr_read_b32 v34, a2 +; GCN-GFX908-HSA-NEXT: v_accvgpr_read_b32 v33, a1 +; GCN-GFX908-HSA-NEXT: v_accvgpr_read_b32 v32, a0 +; GCN-GFX908-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v60, 31, v52 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v58, 31, v51 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v50 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v49 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v0, v49 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v2, v50 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v57, v51 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v59, v52 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:192 +; GCN-GFX908-HSA-NEXT: s_waitcnt vmcnt(3) +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v31, 31, v24 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v29, 31, v23 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v22 +; GCN-GFX908-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v21 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v4, v21 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v6, v22 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[32:35], s[0:1] offset:208 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[37:40], s[0:1] offset:160 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[25:28], s[0:1] offset:176 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[41:44], s[0:1] offset:128 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[9:12], s[0:1] offset:144 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[45:48], s[0:1] offset:96 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[13:16], s[0:1] offset:112 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[53:56], s[0:1] offset:64 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[17:20], s[0:1] offset:80 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[57:60], s[0:1] offset:48 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v28, v23 +; GCN-GFX908-HSA-NEXT: v_mov_b32_e32 v30, v24 +; GCN-GFX908-HSA-NEXT: global_store_dwordx4 v8, v[28:31], s[0:1] offset:16 +; GCN-GFX908-HSA-NEXT: s_endpgm %ld = load <32 x i32>, ptr addrspace(1) %in %ext = sext <32 x i32> %ld to <32 x i64> store <32 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_zextload_v32i32_to_v32i64: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s2, -1 +; SI-NOHSA-NEXT: v_mov_b32_e32 v1, 0 +; SI-NOHSA-NEXT: s_mov_b32 s10, s2 +; SI-NOHSA-NEXT: s_mov_b32 s11, s3 +; SI-NOHSA-NEXT: v_mov_b32_e32 v3, v1 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s8, s6 +; SI-NOHSA-NEXT: s_mov_b32 s9, s7 +; SI-NOHSA-NEXT: s_mov_b32 s0, s4 +; SI-NOHSA-NEXT: s_mov_b32 s1, s5 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:112 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:96 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:48 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(5) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v4 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v5 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:64 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[32:35], off, s[8:11], 0 offset:80 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v6 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v7 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(8) expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v8 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v9 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v10 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v11 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(4) expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v32 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v33 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v34 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v35 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v28 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v29 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v30 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v31 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v24 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v25 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v26 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v27 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v20 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v21 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v22 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v23 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v16 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v17 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v18 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v19 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v12 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v13 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NOHSA-NEXT: s_waitcnt expcnt(0) +; SI-NOHSA-NEXT: v_mov_b32_e32 v0, v14 +; SI-NOHSA-NEXT: v_mov_b32_e32 v2, v15 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_zextload_v32i32_to_v32i64: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s6, s2, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s7, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s8, s2, 48 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[28:31], v[0:1] +; GCNX3-HSA-NEXT: s_addc_u32 s9, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s10, s2, 64 +; GCNX3-HSA-NEXT: s_addc_u32 s11, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s12, s2, 0x50 +; GCNX3-HSA-NEXT: s_addc_u32 s13, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s14, s2, 0x60 +; GCNX3-HSA-NEXT: s_addc_u32 s15, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 0x70 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[32:35], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s14 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s15 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[24:27], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s12 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s13 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[20:23], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s11 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[16:19], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s9 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s7 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s6 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, v1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v28 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v29 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v29, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, s0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[28:29], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v29, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xe0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v30 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v31 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v31, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v30, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xf0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[28:29], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v29, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xc0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(8) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v33 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[30:31], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v31, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v30, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xd0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v33, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v32, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xa0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v34 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v35 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v35, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v34, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0xb0 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[28:29], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v29, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x80 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(9) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v24 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v25 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v25, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v24, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x90 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[30:31], v[0:3] +; GCNX3-HSA-NEXT: s_nop 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v26 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v27 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v27, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v26, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x60 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[32:33], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v31, s3 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(10) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v20 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v21 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[34:35], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v30, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v22 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v23 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x70 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[28:29], v[0:3] +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(11) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v16 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v17 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] +; GCNX3-HSA-NEXT: s_nop 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v18 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v19 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[26:27], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(12) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v12 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v13 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 64 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[30:31], v[0:3] +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v14 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v15 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(12) +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v9 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x50 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s2 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 32 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v11 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 48 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, v6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, v7 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s0 +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_zextload_v32i32_to_v32i64: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s3, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s2, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s3 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s7 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:112 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:96 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:80 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:64 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[32:35], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v29, 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v31, v29 +; GCNX3-NOHSA-NEXT: s_mov_b32 s0, s4 +; GCNX3-NOHSA-NEXT: s_mov_b32 s1, s5 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v1 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:224 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v2 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v3 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:240 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(8) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v4 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v5 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:192 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v6 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v7 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:208 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(9) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v8 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v9 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:160 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v10 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v11 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:176 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(10) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v12 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v13 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:128 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v14 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v15 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:144 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(11) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v16 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v17 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:96 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v18 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v19 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(12) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v20 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v21 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v22 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v23 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(13) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v24 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v25 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v26 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v27 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:48 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(14) +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v32 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v33 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 +; GCNX3-NOHSA-NEXT: s_nop 0 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v28, v34 +; GCNX3-NOHSA-NEXT: v_mov_b32_e32 v30, v35 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_zextload_v32i32_to_v32i64: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 2 @22 +; EG-NEXT: ALU 10, @39, KC0[], KC1[] +; EG-NEXT: TEX 4 @28 +; EG-NEXT: ALU 100, @50, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T31.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T30.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T29.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T28.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T27.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T26.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T13.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T12.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T11.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T10.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T2.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 22: +; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 112, #1 +; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 80, #1 +; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 96, #1 +; EG-NEXT: Fetch clause starting at 28: +; EG-NEXT: VTX_READ_128 T10.XYZW, T0.X, 0, #1 +; EG-NEXT: VTX_READ_128 T11.XYZW, T0.X, 16, #1 +; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 32, #1 +; EG-NEXT: VTX_READ_128 T13.XYZW, T0.X, 48, #1 +; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 64, #1 +; EG-NEXT: ALU clause starting at 38: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 39: +; EG-NEXT: MOV T4.X, T1.X, +; EG-NEXT: MOV T4.Y, 0.0, +; EG-NEXT: MOV * T5.X, T1.Z, +; EG-NEXT: MOV * T5.Y, 0.0, +; EG-NEXT: MOV T6.X, T3.X, +; EG-NEXT: MOV T6.Y, 0.0, +; EG-NEXT: MOV * T7.X, T3.Z, +; EG-NEXT: MOV * T7.Y, 0.0, +; EG-NEXT: MOV T8.X, T2.X, +; EG-NEXT: MOV T8.Y, 0.0, +; EG-NEXT: MOV * T9.X, T2.Z, +; EG-NEXT: ALU clause starting at 50: +; EG-NEXT: MOV * T9.Y, 0.0, +; EG-NEXT: MOV T14.X, T0.X, +; EG-NEXT: MOV T14.Y, 0.0, +; EG-NEXT: MOV * T15.X, T0.Z, +; EG-NEXT: MOV * T15.Y, 0.0, +; EG-NEXT: MOV T16.X, T13.X, +; EG-NEXT: MOV T16.Y, 0.0, +; EG-NEXT: MOV * T17.X, T13.Z, +; EG-NEXT: MOV * T17.Y, 0.0, +; EG-NEXT: MOV T18.X, T12.X, +; EG-NEXT: MOV T18.Y, 0.0, +; EG-NEXT: MOV * T19.X, T12.Z, +; EG-NEXT: MOV * T19.Y, 0.0, +; EG-NEXT: MOV T20.X, T11.X, +; EG-NEXT: MOV T20.Y, 0.0, +; EG-NEXT: MOV * T21.X, T11.Z, +; EG-NEXT: MOV * T21.Y, 0.0, +; EG-NEXT: MOV T22.X, T10.X, +; EG-NEXT: MOV T22.Y, 0.0, +; EG-NEXT: MOV * T23.X, T10.Z, +; EG-NEXT: MOV T23.Y, 0.0, +; EG-NEXT: MOV T4.Z, T1.Y, +; EG-NEXT: MOV T4.W, 0.0, +; EG-NEXT: MOV * T5.Z, T1.W, +; EG-NEXT: MOV * T5.W, 0.0, +; EG-NEXT: MOV T6.Z, T3.Y, +; EG-NEXT: MOV T6.W, 0.0, +; EG-NEXT: MOV * T7.Z, T3.W, +; EG-NEXT: MOV * T7.W, 0.0, +; EG-NEXT: MOV T8.Z, T2.Y, +; EG-NEXT: MOV T8.W, 0.0, +; EG-NEXT: MOV * T9.Z, T2.W, +; EG-NEXT: MOV * T9.W, 0.0, +; EG-NEXT: MOV T14.Z, T0.Y, +; EG-NEXT: MOV T14.W, 0.0, +; EG-NEXT: MOV * T15.Z, T0.W, +; EG-NEXT: MOV * T15.W, 0.0, +; EG-NEXT: MOV T16.Z, T13.Y, +; EG-NEXT: MOV T16.W, 0.0, +; EG-NEXT: MOV * T17.Z, T13.W, +; EG-NEXT: MOV * T17.W, 0.0, +; EG-NEXT: MOV T18.Z, T12.Y, +; EG-NEXT: MOV T18.W, 0.0, +; EG-NEXT: MOV * T19.Z, T12.W, +; EG-NEXT: MOV * T19.W, 0.0, +; EG-NEXT: MOV T20.Z, T11.Y, +; EG-NEXT: MOV T20.W, 0.0, +; EG-NEXT: MOV * T21.Z, T11.W, +; EG-NEXT: MOV * T21.W, 0.0, +; EG-NEXT: MOV T22.Z, T10.Y, +; EG-NEXT: MOV T22.W, 0.0, +; EG-NEXT: MOV * T23.Z, T10.W, +; EG-NEXT: MOV T23.W, 0.0, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PS, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T2.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) +; EG-NEXT: LSHR T3.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) +; EG-NEXT: LSHR T10.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) +; EG-NEXT: LSHR T11.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) +; EG-NEXT: LSHR T12.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) +; EG-NEXT: LSHR T13.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) +; EG-NEXT: LSHR T24.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) +; EG-NEXT: LSHR T25.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) +; EG-NEXT: LSHR T26.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) +; EG-NEXT: LSHR T27.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) +; EG-NEXT: LSHR T28.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) +; EG-NEXT: LSHR T29.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) +; EG-NEXT: LSHR T30.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) +; EG-NEXT: LSHR * T31.X, PV.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_zextload_v32i32_to_v32i64: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v1, s[2:3] offset:112 +; GCN-HSA-NEXT: global_load_dwordx4 v[8:11], v1, s[2:3] offset:96 +; GCN-HSA-NEXT: global_load_dwordx4 v[12:15], v1, s[2:3] offset:80 +; GCN-HSA-NEXT: global_load_dwordx4 v[16:19], v1, s[2:3] offset:64 +; GCN-HSA-NEXT: global_load_dwordx4 v[20:23], v1, s[2:3] offset:48 +; GCN-HSA-NEXT: global_load_dwordx4 v[24:27], v1, s[2:3] offset:32 +; GCN-HSA-NEXT: global_load_dwordx4 v[28:31], v1, s[2:3] offset:16 +; GCN-HSA-NEXT: global_load_dwordx4 v[32:35], v1, s[2:3] +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v4 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v5 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:224 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v6 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v7 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:240 +; GCN-HSA-NEXT: s_waitcnt vmcnt(8) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v8 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v9 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:192 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v10 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v11 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:208 +; GCN-HSA-NEXT: s_waitcnt vmcnt(9) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v12 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v13 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:160 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v14 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v15 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:176 +; GCN-HSA-NEXT: s_waitcnt vmcnt(10) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v16 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v17 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:128 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v18 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v19 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:144 +; GCN-HSA-NEXT: s_waitcnt vmcnt(11) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v20 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v21 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:96 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v22 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v23 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:112 +; GCN-HSA-NEXT: s_waitcnt vmcnt(12) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v24 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v25 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:64 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v26 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v27 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:80 +; GCN-HSA-NEXT: s_waitcnt vmcnt(13) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v28 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v29 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:32 +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v30 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v31 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:48 +; GCN-HSA-NEXT: s_waitcnt vmcnt(14) +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v32 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v33 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GCN-HSA-NEXT: s_nop 0 +; GCN-HSA-NEXT: v_mov_b32_e32 v0, v34 +; GCN-HSA-NEXT: v_mov_b32_e32 v2, v35 +; GCN-HSA-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] offset:16 +; GCN-HSA-NEXT: s_endpgm %ld = load <32 x i32>, ptr addrspace(1) %in %ext = zext <32 x i32> %ld to <32 x i64> store <32 x i64> %ext, ptr addrspace(1) %out ret void } -; FUNC-LABEL: {{^}}global_load_v32i32: -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 -; GCN-NOHSA: buffer_load_dwordx4 - -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 -; GCN-HSA: {{flat|global}}_load_dwordx4 - - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA-DAG: buffer_store_dwordx4 - -; GCN-NOT: accvgpr - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 - -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 -; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 define amdgpu_kernel void @global_load_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-NOHSA-LABEL: global_load_v32i32: +; SI-NOHSA: ; %bb.0: +; SI-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; SI-NOHSA-NEXT: s_mov_b32 s6, -1 +; SI-NOHSA-NEXT: s_mov_b32 s10, s6 +; SI-NOHSA-NEXT: s_mov_b32 s11, s7 +; SI-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; SI-NOHSA-NEXT: s_mov_b32 s4, s0 +; SI-NOHSA-NEXT: s_mov_b32 s5, s1 +; SI-NOHSA-NEXT: s_mov_b32 s8, s2 +; SI-NOHSA-NEXT: s_mov_b32 s9, s3 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:112 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:96 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:80 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:64 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:32 +; SI-NOHSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:48 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(4) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:96 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:112 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(4) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 offset:64 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:80 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(5) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 offset:32 +; SI-NOHSA-NEXT: s_waitcnt vmcnt(5) +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 offset:48 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NOHSA-NEXT: s_endpgm +; +; GCNX3-HSA-LABEL: global_load_v32i32: +; GCNX3-HSA: ; %bb.0: +; GCNX3-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCNX3-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: s_add_u32 s6, s2, 48 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v29, s5 +; GCNX3-HSA-NEXT: s_addc_u32 s7, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v28, s4 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 32 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s4 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 0x50 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v13, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v12, s4 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 64 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v17, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v16, s4 +; GCNX3-HSA-NEXT: s_add_u32 s4, s2, 0x70 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s6 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s7 +; GCNX3-HSA-NEXT: s_add_u32 s2, s2, 0x60 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GCNX3-HSA-NEXT: s_addc_u32 s3, s3, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v21, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v25, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v20, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v24, s2 +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[20:23], v[20:21] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[24:27], v[24:25] +; GCNX3-HSA-NEXT: flat_load_dwordx4 v[28:31], v[28:29] +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 0x60 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v33, s1 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v32, s0 +; GCNX3-HSA-NEXT: s_add_u32 s4, s0, 0x70 +; GCNX3-HSA-NEXT: s_addc_u32 s5, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[32:33], v[0:3] +; GCNX3-HSA-NEXT: s_nop 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v0, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v1, s3 +; GCNX3-HSA-NEXT: s_add_u32 s2, s0, 64 +; GCNX3-HSA-NEXT: s_addc_u32 s3, s1, 0 +; GCNX3-HSA-NEXT: s_add_u32 s6, s0, 0x50 +; GCNX3-HSA-NEXT: s_addc_u32 s7, s1, 0 +; GCNX3-HSA-NEXT: s_add_u32 s8, s0, 32 +; GCNX3-HSA-NEXT: s_addc_u32 s9, s1, 0 +; GCNX3-HSA-NEXT: s_add_u32 s10, s0, 48 +; GCNX3-HSA-NEXT: s_addc_u32 s11, s1, 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s10 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s11 +; GCNX3-HSA-NEXT: s_add_u32 s0, s0, 16 +; GCNX3-HSA-NEXT: s_addc_u32 s1, s1, 0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GCNX3-HSA-NEXT: s_nop 0 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v6, s8 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v7, s9 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[6:7], v[8:11] +; GCNX3-HSA-NEXT: v_mov_b32_e32 v6, s6 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v2, s4 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v5, s3 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v7, s7 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v9, s1 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v3, s5 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v4, s2 +; GCNX3-HSA-NEXT: v_mov_b32_e32 v8, s0 +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[6:7], v[12:15] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[2:3], v[20:23] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] +; GCNX3-HSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-HSA-NEXT: flat_store_dwordx4 v[8:9], v[28:31] +; GCNX3-HSA-NEXT: s_endpgm +; +; GCNX3-NOHSA-LABEL: global_load_v32i32: +; GCNX3-NOHSA: ; %bb.0: +; GCNX3-NOHSA-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCNX3-NOHSA-NEXT: s_mov_b32 s7, 0xf000 +; GCNX3-NOHSA-NEXT: s_mov_b32 s6, -1 +; GCNX3-NOHSA-NEXT: s_mov_b32 s10, s6 +; GCNX3-NOHSA-NEXT: s_mov_b32 s11, s7 +; GCNX3-NOHSA-NEXT: s_waitcnt lgkmcnt(0) +; GCNX3-NOHSA-NEXT: s_mov_b32 s8, s2 +; GCNX3-NOHSA-NEXT: s_mov_b32 s9, s3 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:112 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:96 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:80 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:64 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:32 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:48 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 +; GCNX3-NOHSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_mov_b32 s4, s0 +; GCNX3-NOHSA-NEXT: s_mov_b32 s5, s1 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(6) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:96 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:112 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(6) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:64 +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:80 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:32 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 offset:48 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 +; GCNX3-NOHSA-NEXT: s_waitcnt vmcnt(7) +; GCNX3-NOHSA-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 offset:16 +; GCNX3-NOHSA-NEXT: s_endpgm +; +; EG-LABEL: global_load_v32i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 23, @28, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 7 @12 +; EG-NEXT: ALU 1, @52, KC0[], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T15.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T6.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T5.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T4.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T3.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T2.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T1.X, 0 +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T0.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: Fetch clause starting at 12: +; EG-NEXT: VTX_READ_128 T8.XYZW, T7.X, 96, #1 +; EG-NEXT: VTX_READ_128 T9.XYZW, T7.X, 112, #1 +; EG-NEXT: VTX_READ_128 T10.XYZW, T7.X, 64, #1 +; EG-NEXT: VTX_READ_128 T11.XYZW, T7.X, 80, #1 +; EG-NEXT: VTX_READ_128 T12.XYZW, T7.X, 32, #1 +; EG-NEXT: VTX_READ_128 T13.XYZW, T7.X, 48, #1 +; EG-NEXT: VTX_READ_128 T14.XYZW, T7.X, 0, #1 +; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 16, #1 +; EG-NEXT: ALU clause starting at 28: +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T0.X, PV.W, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T2.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) +; EG-NEXT: LSHR T3.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) +; EG-NEXT: LSHR T4.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) +; EG-NEXT: LSHR T5.X, PV.W, literal.x, +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, +; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) +; EG-NEXT: LSHR T6.X, PV.W, literal.x, +; EG-NEXT: MOV * T7.X, KC0[2].Z, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, +; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) +; EG-NEXT: ALU clause starting at 52: +; EG-NEXT: LSHR * T15.X, T0.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-HSA-LABEL: global_load_v32i32: +; GCN-HSA: ; %bb.0: +; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-HSA-NEXT: v_mov_b32_e32 v32, 0 +; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) +; GCN-HSA-NEXT: global_load_dwordx4 v[0:3], v32, s[2:3] offset:96 +; GCN-HSA-NEXT: global_load_dwordx4 v[4:7], v32, s[2:3] offset:112 +; GCN-HSA-NEXT: global_load_dwordx4 v[8:11], v32, s[2:3] offset:64 +; GCN-HSA-NEXT: global_load_dwordx4 v[12:15], v32, s[2:3] offset:80 +; GCN-HSA-NEXT: global_load_dwordx4 v[16:19], v32, s[2:3] offset:32 +; GCN-HSA-NEXT: global_load_dwordx4 v[20:23], v32, s[2:3] offset:48 +; GCN-HSA-NEXT: global_load_dwordx4 v[24:27], v32, s[2:3] +; GCN-HSA-NEXT: global_load_dwordx4 v[28:31], v32, s[2:3] offset:16 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:96 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] offset:112 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:64 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:80 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:48 +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] +; GCN-HSA-NEXT: s_waitcnt vmcnt(7) +; GCN-HSA-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:16 +; GCN-HSA-NEXT: s_endpgm %ld = load <32 x i32>, ptr addrspace(1) %in store <32 x i32> %ld, ptr addrspace(1) %out ret void From bb98227db19ae4d80af7a25a9423aae2aeaec61d Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 1 Dec 2023 10:30:15 +0100 Subject: [PATCH 16/72] [libc][NFC] Remove named_pair (#73952) `named_pair` does not provide enough value to deserve its own header. --- libc/src/__support/CMakeLists.txt | 8 -------- libc/src/__support/math_extras.h | 11 ++++++++--- libc/src/__support/named_pair.h | 18 ------------------ libc/src/__support/number_pair.h | 6 ++++-- .../llvm-project-overlay/libc/BUILD.bazel | 7 ------- 5 files changed, 12 insertions(+), 38 deletions(-) delete mode 100644 libc/src/__support/named_pair.h diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index decd6ed2dbd2bd..ba80965b5aaaf3 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -10,12 +10,6 @@ add_header_library( libc.src.__support.CPP.new ) -add_header_library( - named_pair - HDRS - named_pair.h -) - add_header_library( common HDRS @@ -40,7 +34,6 @@ add_header_library( HDRS math_extras.h DEPENDS - .named_pair libc.src.__support.CPP.type_traits libc.src.__support.macros.attributes libc.src.__support.macros.config @@ -187,7 +180,6 @@ add_header_library( HDRS number_pair.h DEPENDS - .named_pair libc.src.__support.CPP.type_traits ) diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index cc22aa49d02601..860cdda8586d1e 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -10,7 +10,6 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H #define LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H -#include "named_pair.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN @@ -18,7 +17,10 @@ namespace LIBC_NAMESPACE { // Add with carry -DEFINE_NAMED_PAIR_TEMPLATE(SumCarry, sum, carry); +template struct SumCarry { + T sum; + T carry; +}; // This version is always valid for constexpr. template @@ -91,7 +93,10 @@ add_with_carry(unsigned long long a, unsigned long long b, #endif // LIBC_HAS_BUILTIN(__builtin_addc) // Subtract with borrow -DEFINE_NAMED_PAIR_TEMPLATE(DiffBorrow, diff, borrow); +template struct DiffBorrow { + T diff; + T borrow; +}; // This version is always valid for constexpr. template diff --git a/libc/src/__support/named_pair.h b/libc/src/__support/named_pair.h deleted file mode 100644 index bd7dccf9810c7f..00000000000000 --- a/libc/src/__support/named_pair.h +++ /dev/null @@ -1,18 +0,0 @@ -//===-- Utilities for pairs of numbers. -------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_NAMED_PAIR_H -#define LLVM_LIBC_SRC___SUPPORT_NAMED_PAIR_H - -#define DEFINE_NAMED_PAIR_TEMPLATE(Name, FirstField, SecondField) \ - template struct Name { \ - T1 FirstField; \ - T2 SecondField; \ - } - -#endif // LLVM_LIBC_SRC___SUPPORT_NAMED_PAIR_H diff --git a/libc/src/__support/number_pair.h b/libc/src/__support/number_pair.h index 5e553d817994b4..12e730836af2c6 100644 --- a/libc/src/__support/number_pair.h +++ b/libc/src/__support/number_pair.h @@ -10,13 +10,15 @@ #define LLVM_LIBC_SRC___SUPPORT_NUMBER_PAIR_H #include "CPP/type_traits.h" -#include "named_pair.h" #include namespace LIBC_NAMESPACE { -DEFINE_NAMED_PAIR_TEMPLATE(NumberPair, lo, hi); +template struct NumberPair { + T lo; + T hi; +}; template cpp::enable_if_t && cpp::is_unsigned_v, NumberPair> diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index d53ca202101537..46d81987e7b32f 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -432,7 +432,6 @@ libc_support_library( hdrs = ["src/__support/number_pair.h"], deps = [ ":__support_cpp_type_traits", - ":__support_named_pair", ], ) @@ -587,11 +586,6 @@ libc_support_library( ], ) -libc_support_library( - name = "__support_named_pair", - hdrs = ["src/__support/named_pair.h"], -) - libc_support_library( name = "__support_bit", hdrs = ["src/__support/bit.h"], @@ -608,7 +602,6 @@ libc_support_library( ":__support_cpp_type_traits", ":__support_macros_attributes", ":__support_macros_config", - ":__support_named_pair", ], ) From 5fe7ae848cc6cb2afc3aab332743ffa2bb635fc3 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 10:49:27 +0000 Subject: [PATCH 17/72] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (#72849) Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr) Patch by: Kerry McLaughlin kerry.mclaughlin@arm.com --- clang/include/clang/Basic/arm_sme.td | 8 ++++ .../acle_sme2_ldr_str_zt.c | 41 +++++++++++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 7 +++- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 7 +++- .../Target/AArch64/AArch64ISelLowering.cpp | 18 ++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp | 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 23 ++++++++--- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll | 27 ++++++++++++ 10 files changed, 134 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index d55deeaa40bbcd..7aae3c832bb1fe 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -314,3 +314,11 @@ let TargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; } + +// +// Spill and fill of ZT0 +// +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 00000000000000..126a4fc1045853 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,41 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(0, base); +} + +// STR ZT0 + +// CHECK-LABEL: @test_svstr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svstr_zt(0, base); +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index 4c35a238d9f9e2..70987ad395f735 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sve -fsyntax-only -verify %s +// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s // REQUIRES: aarch64-registered-target @@ -19,3 +19,8 @@ void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t svbmops_za32_u32_m(4, pred, pred, u32, u32); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } + +void test_ldr_str_zt(const void *const_base, void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(1, const_base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svstr_zt(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 136512db123b30..2f49e9a6b37cc3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -326,9 +326,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return false; } - template bool ImmToTile(SDValue N, SDValue &Imm) { + template + bool ImmToTile(SDValue N, SDValue &Imm) { if (auto *CI = dyn_cast(N)) { uint64_t C = CI->getZExtValue(); + + if (C > Max) + return false; + Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index cb093a1613110e..4379c3fde6f3c5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2753,6 +2753,20 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } +MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsSpill) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB; + unsigned Opc = IsSpill ? AArch64::STR_TX : AArch64::LDR_TX; + auto Rs = IsSpill ? RegState::Kill : RegState::Define; + MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + MIB.addReg(MI.getOperand(0).getReg(), Rs); + MIB.add(MI.getOperand(1)); // Base + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, @@ -2869,6 +2883,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB); case AArch64::LDR_ZA_PSEUDO: return EmitFill(MI, BB); + case AArch64::LDR_TX_PSEUDO: + return EmitZTSpillFill(MI, BB, /*IsSpill=*/false); + case AArch64::STR_TX_PSEUDO: + return EmitZTSpillFill(MI, BB, /*IsSpill=*/true); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 25d7cb6d212d1f..009f8744b408a9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -623,6 +623,8 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB, bool HasTile) const; + MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB, + bool IsSpill) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock * diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index ed64a7b4984c17..24ba9dd95004c6 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { Reserved.set(SubReg); } + if (MF.getSubtarget().hasSME2()) { + for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true); + SubReg.isValid(); ++SubReg) + Reserved.set(*SubReg); + } + markSuperRegs(Reserved, AArch64::FPCR); if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index bb9464a8d2e1cf..fcfa5f82a3809c 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -541,8 +541,8 @@ defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops def ZERO_T : sme2_zero_zt<"zero", 0b0001>; -def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>; -def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>; +defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>; +defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>; def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>; def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 6c9b1f11a4decd..ef9c323e25bc35 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -10,11 +10,12 @@ // //===----------------------------------------------------------------------===// -def imm_to_tile8 : ComplexPattern", []>; -def imm_to_tile16 : ComplexPattern", []>; -def imm_to_tile32 : ComplexPattern", []>; -def imm_to_tile64 : ComplexPattern", []>; -def imm_to_tile128 : ComplexPattern", []>; +def imm_to_tile8 : ComplexPattern", []>; +def imm_to_tile16 : ComplexPattern", []>; +def imm_to_tile32 : ComplexPattern", []>; +def imm_to_tile64 : ComplexPattern", []>; +def imm_to_tile128 : ComplexPattern", []>; +def imm_to_zt : ComplexPattern", []>; def tileslice8 : ComplexPattern", []>; def tileslice16 : ComplexPattern", []>; @@ -3137,6 +3138,18 @@ class sme2_spill_fill_vector opc> let mayStore = opc{7}; } + +multiclass sme2_spill_fill_vector opc, SDPatternOperator op> { + def NAME : sme2_spill_fill_vector; + def NAME # _PSEUDO + : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> { + // Translated to actual instruction in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + } + def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base), + (!cast(NAME # _PSEUDO) $tile, $base)>; +} + //===----------------------------------------------------------------------===/// // SME2 move to/from lookup table class sme2_movt_zt_to_scalar opc> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll new file mode 100644 index 00000000000000..30205d86f2fb20 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; LDR + +define void @ldr_zt0(ptr %ptr) { +; CHECK-LABEL: ldr_zt0: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr zt0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %ptr) + ret void; +} + +; STR + +define void @str_zt0(ptr %ptr) { +; CHECK-LABEL: str_zt0: +; CHECK: // %bb.0: +; CHECK-NEXT: str zt0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.str.zt(i32 0, ptr %ptr) + ret void; +} + +declare void @llvm.aarch64.sme.ldr.zt(i32, ptr) +declare void @llvm.aarch64.sme.str.zt(i32, ptr) From de55a2843fae6afd4b0589d81496096a4ff73cbd Mon Sep 17 00:00:00 2001 From: XinWang10 <108658776+XinWang10@users.noreply.github.com> Date: Fri, 1 Dec 2023 17:39:25 +0800 Subject: [PATCH 18/72] [X86][MC] Support Enc/Dec for EGPR for promoted BMI instructions (#73899) R16-R31 was added into GPRs in https://github.com/llvm/llvm-project/pull/70958, This patch supports the encoding/decoding for promoted BMI instructions in EVEX space. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4 --- .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 6 +- llvm/lib/Target/X86/X86InstrArithmetic.td | 58 +++++-- llvm/lib/Target/X86/X86InstrMisc.td | 152 ++++++++++-------- llvm/lib/Target/X86/X86InstrShiftRotate.td | 59 ++++--- llvm/test/MC/Disassembler/X86/apx/andn.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/bextr.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/blsi.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/blsmsk.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/blsr.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/bzhi.txt | 18 +++ .../MC/Disassembler/X86/apx/evex-format.txt | 12 ++ llvm/test/MC/Disassembler/X86/apx/mulx.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/pdep.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/pext.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/rorx.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/sarx.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/shlx.txt | 18 +++ llvm/test/MC/Disassembler/X86/apx/shrx.txt | 18 +++ llvm/test/MC/X86/apx/andn-att.s | 20 +++ llvm/test/MC/X86/apx/andn-intel.s | 17 ++ llvm/test/MC/X86/apx/bextr-att.s | 20 +++ llvm/test/MC/X86/apx/bextr-intel.s | 17 ++ llvm/test/MC/X86/apx/blsi-att.s | 20 +++ llvm/test/MC/X86/apx/blsi-intel.s | 17 ++ llvm/test/MC/X86/apx/blsmsk-att.s | 20 +++ llvm/test/MC/X86/apx/blsmsk-intel.s | 17 ++ llvm/test/MC/X86/apx/blsr-att.s | 20 +++ llvm/test/MC/X86/apx/blsr-intel.s | 17 ++ llvm/test/MC/X86/apx/bzhi-att.s | 20 +++ llvm/test/MC/X86/apx/bzhi-intel.s | 17 ++ llvm/test/MC/X86/apx/evex-format-att.s | 12 ++ llvm/test/MC/X86/apx/evex-format-intel.s | 12 ++ llvm/test/MC/X86/apx/mulx-att.s | 20 +++ llvm/test/MC/X86/apx/mulx-intel.s | 17 ++ llvm/test/MC/X86/apx/pdep-att.s | 20 +++ llvm/test/MC/X86/apx/pdep-intel.s | 17 ++ llvm/test/MC/X86/apx/pext-att.s | 20 +++ llvm/test/MC/X86/apx/pext-intel.s | 17 ++ llvm/test/MC/X86/apx/rorx-att.s | 20 +++ llvm/test/MC/X86/apx/rorx-intel.s | 17 ++ llvm/test/MC/X86/apx/sarx-att.s | 20 +++ llvm/test/MC/X86/apx/sarx-intel.s | 17 ++ llvm/test/MC/X86/apx/shlx-att.s | 20 +++ llvm/test/MC/X86/apx/shlx-intel.s | 17 ++ llvm/test/MC/X86/apx/shrx-att.s | 20 +++ llvm/test/MC/X86/apx/shrx-intel.s | 17 ++ llvm/test/TableGen/x86-fold-tables.inc | 26 +++ 47 files changed, 942 insertions(+), 110 deletions(-) create mode 100644 llvm/test/MC/Disassembler/X86/apx/andn.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/bextr.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/blsi.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/blsmsk.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/blsr.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/bzhi.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/mulx.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/pdep.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/pext.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/rorx.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/sarx.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/shlx.txt create mode 100644 llvm/test/MC/Disassembler/X86/apx/shrx.txt create mode 100644 llvm/test/MC/X86/apx/andn-att.s create mode 100644 llvm/test/MC/X86/apx/andn-intel.s create mode 100644 llvm/test/MC/X86/apx/bextr-att.s create mode 100644 llvm/test/MC/X86/apx/bextr-intel.s create mode 100644 llvm/test/MC/X86/apx/blsi-att.s create mode 100644 llvm/test/MC/X86/apx/blsi-intel.s create mode 100644 llvm/test/MC/X86/apx/blsmsk-att.s create mode 100644 llvm/test/MC/X86/apx/blsmsk-intel.s create mode 100644 llvm/test/MC/X86/apx/blsr-att.s create mode 100644 llvm/test/MC/X86/apx/blsr-intel.s create mode 100644 llvm/test/MC/X86/apx/bzhi-att.s create mode 100644 llvm/test/MC/X86/apx/bzhi-intel.s create mode 100644 llvm/test/MC/X86/apx/mulx-att.s create mode 100644 llvm/test/MC/X86/apx/mulx-intel.s create mode 100644 llvm/test/MC/X86/apx/pdep-att.s create mode 100644 llvm/test/MC/X86/apx/pdep-intel.s create mode 100644 llvm/test/MC/X86/apx/pext-att.s create mode 100644 llvm/test/MC/X86/apx/pext-intel.s create mode 100644 llvm/test/MC/X86/apx/rorx-att.s create mode 100644 llvm/test/MC/X86/apx/rorx-intel.s create mode 100644 llvm/test/MC/X86/apx/sarx-att.s create mode 100644 llvm/test/MC/X86/apx/sarx-intel.s create mode 100644 llvm/test/MC/X86/apx/shlx-att.s create mode 100644 llvm/test/MC/X86/apx/shlx-intel.s create mode 100644 llvm/test/MC/X86/apx/shrx-att.s create mode 100644 llvm/test/MC/X86/apx/shrx-intel.s diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 1f130c22298ed4..b6ebbcf56aef73 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1115,10 +1115,10 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, case X86II::MRMSrcMem4VOp3: { // Instruction format for 4VOp3: // src1(ModR/M), MemAddr, src3(VEX_4V) - Prefix.setR(MI, CurOp++); + Prefix.setRR2(MI, CurOp++); Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg); Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg); - Prefix.set4V(MI, CurOp + X86::AddrNumOperands); + Prefix.set4VV2(MI, CurOp + X86::AddrNumOperands); break; } case X86II::MRMSrcMemOp4: { @@ -1189,7 +1189,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // src1(ModR/M), src2(ModR/M), src3(VEX_4V) Prefix.setRR2(MI, CurOp++); Prefix.setBB2(MI, CurOp++); - Prefix.set4V(MI, CurOp++); + Prefix.set4VV2(MI, CurOp++); break; } case X86II::MRMSrcRegOp4: { diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 48188da291ded0..56cbc13eaaec8d 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1289,21 +1289,34 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2), // multiclass bmi_andn { +let Predicates = [HasBMI, NoEGPR] in { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - Sched<[sched]>; + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, + VEX_4V, Sched<[sched]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, + VEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; +} +let Predicates = [HasBMI, HasEGPR, In64BitMode] in { + def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, + EVEX_4V, Sched<[sched]>; + def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; +} } // Complexity is reduced to give and with immediate a chance to match first. -let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in { - defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V; - defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, REX_W; +let Defs = [EFLAGS], AddedComplexity = -6 in { + defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS; + defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, REX_W; } let Predicates = [HasBMI], AddedComplexity = -6 in { @@ -1323,6 +1336,7 @@ let Predicates = [HasBMI], AddedComplexity = -6 in { multiclass bmi_mulx { let hasSideEffects = 0 in { +let Predicates = [HasBMI2, NoEGPR] in { def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), []>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>; @@ -1346,15 +1360,27 @@ let hasSideEffects = 0 in { def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src), []>, Sched<[sched.Folded]>; } +let Predicates = [HasBMI2, HasEGPR, In64BitMode] in + def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), + !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), + []>, T8XD, EVEX_4V, Sched<[WriteIMulH, sched]>; +let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in + def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), + !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), + []>, T8XD, EVEX_4V, + Sched<[WriteIMulHLd, sched.Folded, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Implicit read of EDX/RDX + sched.ReadAfterFold]>; } - -let Predicates = [HasBMI2] in { - let Uses = [EDX] in - defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>; - let Uses = [RDX] in - defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W; } +let Uses = [EDX] in + defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>; +let Uses = [RDX] in + defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W; + //===----------------------------------------------------------------------===// // ADCX and ADOX Instructions // diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 32aa82fc93ca30..764d4bd6da2a1d 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1214,19 +1214,19 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { multiclass bmi_bls { + X86FoldableSchedWrite sched, string Suffix = ""> { let hasSideEffects = 0 in { - def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[sched]>; + def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), + !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, + T8PS, VEX_4V, Sched<[sched]>; let mayLoad = 1 in - def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[sched.Folded]>; + def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), + !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, + T8PS, VEX_4V, Sched<[sched.Folded]>; } } -let Predicates = [HasBMI], Defs = [EFLAGS] in { +let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>; defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W; defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>; @@ -1235,6 +1235,15 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W; } +let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { + defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; + defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; + defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; + defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; + defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; + defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; +} + //===----------------------------------------------------------------------===// // Pattern fragments to auto generate BMI instructions. //===----------------------------------------------------------------------===// @@ -1292,56 +1301,50 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } -multiclass bmi_bextr opc, string mnemonic, RegisterClass RC, - X86MemOperand x86memop, SDNode OpNode, - PatFrag ld_frag, X86FoldableSchedWrite Sched> { - def rr : I, - T8PS, VEX, Sched<[Sched]>; - def rm : I, T8PS, VEX, - Sched<[Sched.Folded, - // x86memop:$src1 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC:$src2 - Sched.ReadAfterFold]>; +multiclass bmi4VOp3_base opc, string mnemonic, RegisterClass RC, + X86MemOperand x86memop, SDPatternOperator OpNode, + PatFrag ld_frag, X86FoldableSchedWrite Sched, + string Suffix = ""> { + def rr#Suffix : I, + T8PS, VEX, Sched<[Sched]>; +let mayLoad = 1 in + def rm#Suffix : I, T8PS, VEX, + Sched<[Sched.Folded, + // x86memop:$src1 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC:$src2 + Sched.ReadAfterFold]>; } -let Predicates = [HasBMI], Defs = [EFLAGS] in { - defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR>; - defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR>, REX_W; -} - -multiclass bmi_bzhi opc, string mnemonic, RegisterClass RC, - X86MemOperand x86memop, SDNode Int, - PatFrag ld_frag, X86FoldableSchedWrite Sched> { - def rr : I, - T8PS, VEX, Sched<[Sched]>; - def rm : I, T8PS, VEX, - Sched<[Sched.Folded, - // x86memop:$src1 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC:$src2 - Sched.ReadAfterFold]>; -} - -let Predicates = [HasBMI2], Defs = [EFLAGS] in { - defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI>; - defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI>, REX_W; +let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { + defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, + X86bextr, loadi32, WriteBEXTR>; + defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, + X86bextr, loadi64, WriteBEXTR>, REX_W; +} +let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in { + defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, + X86bzhi, loadi32, WriteBZHI>; + defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, + X86bzhi, loadi64, WriteBZHI>, REX_W; +} +let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { + defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, + X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX; + defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, + X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W; +} +let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in { + defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, + X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX; + defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, + X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W; } def CountTrailingOnes : SDNodeXForm { - def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>, - VEX_4V, Sched<[WriteALU]>; - def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>, - VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; -} - -let Predicates = [HasBMI2] in { + X86MemOperand x86memop, SDPatternOperator OpNode, + PatFrag ld_frag, string Suffix = ""> { + def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>, + VEX_4V, Sched<[WriteALU]>; + def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>, + VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; +} + +let Predicates = [HasBMI2, NoEGPR] in { defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem, X86pdep, loadi32>, T8XD; defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem, @@ -1406,6 +1409,17 @@ let Predicates = [HasBMI2] in { X86pext, loadi64>, T8XS, REX_W; } +let Predicates = [HasBMI2, HasEGPR] in { + defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem, + X86pdep, loadi32, "_EVEX">, T8XD, EVEX; + defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem, + X86pdep, loadi64, "_EVEX">, T8XD, REX_W, EVEX; + defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem, + X86pext, loadi32, "_EVEX">, T8XS, EVEX; + defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem, + X86pext, loadi64, "_EVEX">, T8XS, REX_W, EVEX; +} + //===----------------------------------------------------------------------===// // Lightweight Profiling Instructions diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index e416e4495e2277..48bf23f8cbf7b2 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -824,38 +824,40 @@ def ROT64L2R_imm8 : SDNodeXForm { +multiclass bmi_rotate { let hasSideEffects = 0 in { - def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TAXD, VEX, Sched<[WriteShift]>; + def ri#Suffix : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, + TAXD, VEX, Sched<[WriteShift]>; let mayLoad = 1 in - def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst), - (ins x86memop:$src1, u8imm:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TAXD, VEX, Sched<[WriteShiftLd]>; + def mi#Suffix : Ii8<0xF0, MRMSrcMem, (outs RC:$dst), + (ins x86memop:$src1, u8imm:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, + TAXD, VEX, Sched<[WriteShiftLd]>; } } -multiclass bmi_shift { +multiclass bmi_shift { let hasSideEffects = 0 in { - def rr : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - VEX, Sched<[WriteShift]>; + def rr#Suffix : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, + VEX, Sched<[WriteShift]>; let mayLoad = 1 in - def rm : I<0xF7, MRMSrcMem4VOp3, - (outs RC:$dst), (ins x86memop:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - VEX, Sched<[WriteShift.Folded, - // x86memop:$src1 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC:$src2 - WriteShift.ReadAfterFold]>; + def rm#Suffix : I<0xF7, MRMSrcMem4VOp3, + (outs RC:$dst), (ins x86memop:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, + VEX, Sched<[WriteShift.Folded, + // x86memop:$src1 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC:$src2 + WriteShift.ReadAfterFold]>; } } -let Predicates = [HasBMI2] in { +let Predicates = [HasBMI2, NoEGPR] in { defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>; defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, REX_W; defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS; @@ -864,7 +866,20 @@ let Predicates = [HasBMI2] in { defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, REX_W; defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD; defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, REX_W; +} +let Predicates = [HasBMI2, HasEGPR] in { + defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX; + defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX; + defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8XS, EVEX; + defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem, "_EVEX">, T8XS, REX_W, EVEX; + defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem, "_EVEX">, T8XD, EVEX; + defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem, "_EVEX">, T8XD, REX_W, EVEX; + defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem, "_EVEX">, T8PD, EVEX; + defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8PD, REX_W, EVEX; +} + +let Predicates = [HasBMI2] in { // Prefer RORX which is non-destructive and doesn't update EFLAGS. let AddedComplexity = 10 in { def : Pat<(rotr GR32:$src, (i8 imm:$shamt)), diff --git a/llvm/test/MC/Disassembler/X86/apx/andn.txt b/llvm/test/MC/Disassembler/X86/apx/andn.txt new file mode 100644 index 00000000000000..8b943d2a0ac44c --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/andn.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: andnl %r18d, %r22d, %r26d +# INTEL: andn r26d, r22d, r18d +0x62,0x6a,0x4c,0x00,0xf2,0xd2 + +# ATT: andnq %r19, %r23, %r27 +# INTEL: andn r27, r23, r19 +0x62,0x6a,0xc4,0x00,0xf2,0xdb + +# ATT: andnl 291(%r28,%r29,4), %r18d, %r22d +# INTEL: andn r22d, r18d, dword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0x68,0x00,0xf2,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: andnq 291(%r28,%r29,4), %r19, %r23 +# INTEL: andn r23, r19, qword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0xe0,0x00,0xf2,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/bextr.txt b/llvm/test/MC/Disassembler/X86/apx/bextr.txt new file mode 100644 index 00000000000000..abd92864b315e3 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/bextr.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: bextrl %r18d, %r22d, %r26d +# INTEL: bextr r26d, r22d, r18d +0x62,0x6a,0x6c,0x00,0xf7,0xd6 + +# ATT: bextrl %r18d, 291(%r28,%r29,4), %r22d +# INTEL: bextr r22d, dword ptr [r28 + 4*r29 + 291], r18d +0x62,0x8a,0x68,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: bextrq %r19, %r23, %r27 +# INTEL: bextr r27, r23, r19 +0x62,0x6a,0xe4,0x00,0xf7,0xdf + +# ATT: bextrq %r19, 291(%r28,%r29,4), %r23 +# INTEL: bextr r23, qword ptr [r28 + 4*r29 + 291], r19 +0x62,0x8a,0xe0,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/blsi.txt b/llvm/test/MC/Disassembler/X86/apx/blsi.txt new file mode 100644 index 00000000000000..254ec90caea515 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/blsi.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: blsil %r18d, %r22d +# INTEL: blsi r22d, r18d +0x62,0xfa,0x4c,0x00,0xf3,0xda + +# ATT: blsiq %r19, %r23 +# INTEL: blsi r23, r19 +0x62,0xfa,0xc4,0x00,0xf3,0xdb + +# ATT: blsil 291(%r28,%r29,4), %r18d +# INTEL: blsi r18d, dword ptr [r28 + 4*r29 + 291] +0x62,0x9a,0x68,0x00,0xf3,0x9c,0xac,0x23,0x01,0x00,0x00 + +# ATT: blsiq 291(%r28,%r29,4), %r19 +# INTEL: blsi r19, qword ptr [r28 + 4*r29 + 291] +0x62,0x9a,0xe0,0x00,0xf3,0x9c,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/blsmsk.txt b/llvm/test/MC/Disassembler/X86/apx/blsmsk.txt new file mode 100644 index 00000000000000..5e47d3d3d625eb --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/blsmsk.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: blsmskl %r18d, %r22d +# INTEL: blsmsk r22d, r18d +0x62,0xfa,0x4c,0x00,0xf3,0xd2 + +# ATT: blsmskq %r19, %r23 +# INTEL: blsmsk r23, r19 +0x62,0xfa,0xc4,0x00,0xf3,0xd3 + +# ATT: blsmskl 291(%r28,%r29,4), %r18d +# INTEL: blsmsk r18d, dword ptr [r28 + 4*r29 + 291] +0x62,0x9a,0x68,0x00,0xf3,0x94,0xac,0x23,0x01,0x00,0x00 + +# ATT: blsmskq 291(%r28,%r29,4), %r19 +# INTEL: blsmsk r19, qword ptr [r28 + 4*r29 + 291] +0x62,0x9a,0xe0,0x00,0xf3,0x94,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/blsr.txt b/llvm/test/MC/Disassembler/X86/apx/blsr.txt new file mode 100644 index 00000000000000..37df4306da26ed --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/blsr.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: blsrl %r18d, %r22d +# INTEL: blsr r22d, r18d +0x62,0xfa,0x4c,0x00,0xf3,0xca + +# ATT: blsrq %r19, %r23 +# INTEL: blsr r23, r19 +0x62,0xfa,0xc4,0x00,0xf3,0xcb + +# ATT: blsrl 291(%r28,%r29,4), %r18d +# INTEL: blsr r18d, dword ptr [r28 + 4*r29 + 291] +0x62,0x9a,0x68,0x00,0xf3,0x8c,0xac,0x23,0x01,0x00,0x00 + +# ATT: blsrq 291(%r28,%r29,4), %r19 +# INTEL: blsr r19, qword ptr [r28 + 4*r29 + 291] +0x62,0x9a,0xe0,0x00,0xf3,0x8c,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/bzhi.txt b/llvm/test/MC/Disassembler/X86/apx/bzhi.txt new file mode 100644 index 00000000000000..44f496e3cc0840 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/bzhi.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: bzhil %r18d, %r22d, %r26d +# INTEL: bzhi r26d, r22d, r18d +0x62,0x6a,0x6c,0x00,0xf5,0xd6 + +# ATT: bzhil %r18d, 291(%r28,%r29,4), %r22d +# INTEL: bzhi r22d, dword ptr [r28 + 4*r29 + 291], r18d +0x62,0x8a,0x68,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: bzhiq %r19, %r23, %r27 +# INTEL: bzhi r27, r23, r19 +0x62,0x6a,0xe4,0x00,0xf5,0xdf + +# ATT: bzhiq %r19, 291(%r28,%r29,4), %r23 +# INTEL: bzhi r23, qword ptr [r28 + 4*r29 + 291], r19 +0x62,0x8a,0xe0,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/evex-format.txt b/llvm/test/MC/Disassembler/X86/apx/evex-format.txt index ee2c2c5bdf909c..389b22cb4a223d 100644 --- a/llvm/test/MC/Disassembler/X86/apx/evex-format.txt +++ b/llvm/test/MC/Disassembler/X86/apx/evex-format.txt @@ -62,8 +62,20 @@ # INTEL: vpslldq zmm0, zmmword ptr [r16 + r17], 0 0x62,0xf9,0x79,0x48,0x73,0x3c,0x08,0x00 +## MRMSrcMem4VOp3 + +# ATT: bzhiq %r19, 291(%r28,%r29,4), %r23 +# INTEL: bzhi r23, qword ptr [r28 + 4*r29 + 291], r19 +0x62,0x8a,0xe0,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00 + ## MRMDestReg # ATT: vextractps $1, %xmm16, %r16d # INTEL: vextractps r16d, xmm16, 1 0x62,0xeb,0x7d,0x08,0x17,0xc0,0x01 + +## MRMSrcReg4VOp3 + +# ATT: bzhiq %r19, %r23, %r27 +# INTEL: bzhi r27, r23, r19 +0x62,0x6a,0xe4,0x00,0xf5,0xdf diff --git a/llvm/test/MC/Disassembler/X86/apx/mulx.txt b/llvm/test/MC/Disassembler/X86/apx/mulx.txt new file mode 100644 index 00000000000000..5d9b53b99a71b6 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/mulx.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: mulxl %r18d, %r22d, %r26d +# INTEL: mulx r26d, r22d, r18d +0x62,0x6a,0x4f,0x00,0xf6,0xd2 + +# ATT: mulxq %r19, %r23, %r27 +# INTEL: mulx r27, r23, r19 +0x62,0x6a,0xc7,0x00,0xf6,0xdb + +# ATT: mulxl 291(%r28,%r29,4), %r18d, %r22d +# INTEL: mulx r22d, r18d, dword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0x6b,0x00,0xf6,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: mulxq 291(%r28,%r29,4), %r19, %r23 +# INTEL: mulx r23, r19, qword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0xe3,0x00,0xf6,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/pdep.txt b/llvm/test/MC/Disassembler/X86/apx/pdep.txt new file mode 100644 index 00000000000000..87268fe5e27dd8 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/pdep.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: pdepl %r18d, %r22d, %r26d +# INTEL: pdep r26d, r22d, r18d +0x62,0x6a,0x4f,0x00,0xf5,0xd2 + +# ATT: pdepq %r19, %r23, %r27 +# INTEL: pdep r27, r23, r19 +0x62,0x6a,0xc7,0x00,0xf5,0xdb + +# ATT: pdepl 291(%r28,%r29,4), %r18d, %r22d +# INTEL: pdep r22d, r18d, dword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0x6b,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: pdepq 291(%r28,%r29,4), %r19, %r23 +# INTEL: pdep r23, r19, qword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0xe3,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/pext.txt b/llvm/test/MC/Disassembler/X86/apx/pext.txt new file mode 100644 index 00000000000000..6c5860aa812812 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/pext.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: pextl %r18d, %r22d, %r26d +# INTEL: pext r26d, r22d, r18d +0x62,0x6a,0x4e,0x00,0xf5,0xd2 + +# ATT: pextq %r19, %r23, %r27 +# INTEL: pext r27, r23, r19 +0x62,0x6a,0xc6,0x00,0xf5,0xdb + +# ATT: pextl 291(%r28,%r29,4), %r18d, %r22d +# INTEL: pext r22d, r18d, dword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0x6a,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: pextq 291(%r28,%r29,4), %r19, %r23 +# INTEL: pext r23, r19, qword ptr [r28 + 4*r29 + 291] +0x62,0x8a,0xe2,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/rorx.txt b/llvm/test/MC/Disassembler/X86/apx/rorx.txt new file mode 100644 index 00000000000000..9860deaea86bdd --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/rorx.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: rorxl $123, %r18d, %r22d +# INTEL: rorx r22d, r18d, 123 +0x62,0xeb,0x7f,0x08,0xf0,0xf2,0x7b + +# ATT: rorxq $123, %r19, %r23 +# INTEL: rorx r23, r19, 123 +0x62,0xeb,0xff,0x08,0xf0,0xfb,0x7b + +# ATT: rorxl $123, 291(%r28,%r29,4), %r18d +# INTEL: rorx r18d, dword ptr [r28 + 4*r29 + 291], 123 +0x62,0x8b,0x7b,0x08,0xf0,0x94,0xac,0x23,0x01,0x00,0x00,0x7b + +# ATT: rorxq $123, 291(%r28,%r29,4), %r19 +# INTEL: rorx r19, qword ptr [r28 + 4*r29 + 291], 123 +0x62,0x8b,0xfb,0x08,0xf0,0x9c,0xac,0x23,0x01,0x00,0x00,0x7b diff --git a/llvm/test/MC/Disassembler/X86/apx/sarx.txt b/llvm/test/MC/Disassembler/X86/apx/sarx.txt new file mode 100644 index 00000000000000..20018f4d4b1283 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/sarx.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: sarxl %r18d, %r22d, %r26d +# INTEL: sarx r26d, r22d, r18d +0x62,0x6a,0x6e,0x00,0xf7,0xd6 + +# ATT: sarxl %r18d, 291(%r28,%r29,4), %r22d +# INTEL: sarx r22d, dword ptr [r28 + 4*r29 + 291], r18d +0x62,0x8a,0x6a,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: sarxq %r19, %r23, %r27 +# INTEL: sarx r27, r23, r19 +0x62,0x6a,0xe6,0x00,0xf7,0xdf + +# ATT: sarxq %r19, 291(%r28,%r29,4), %r23 +# INTEL: sarx r23, qword ptr [r28 + 4*r29 + 291], r19 +0x62,0x8a,0xe2,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/shlx.txt b/llvm/test/MC/Disassembler/X86/apx/shlx.txt new file mode 100644 index 00000000000000..f6d6250bd06318 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/shlx.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: shlxl %r18d, %r22d, %r26d +# INTEL: shlx r26d, r22d, r18d +0x62,0x6a,0x6d,0x00,0xf7,0xd6 + +# ATT: shlxl %r18d, 291(%r28,%r29,4), %r22d +# INTEL: shlx r22d, dword ptr [r28 + 4*r29 + 291], r18d +0x62,0x8a,0x69,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: shlxq %r19, %r23, %r27 +# INTEL: shlx r27, r23, r19 +0x62,0x6a,0xe5,0x00,0xf7,0xdf + +# ATT: shlxq %r19, 291(%r28,%r29,4), %r23 +# INTEL: shlx r23, qword ptr [r28 + 4*r29 + 291], r19 +0x62,0x8a,0xe1,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/apx/shrx.txt b/llvm/test/MC/Disassembler/X86/apx/shrx.txt new file mode 100644 index 00000000000000..09750e05c127e6 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/shrx.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: shrxl %r18d, %r22d, %r26d +# INTEL: shrx r26d, r22d, r18d +0x62,0x6a,0x6f,0x00,0xf7,0xd6 + +# ATT: shrxl %r18d, 291(%r28,%r29,4), %r22d +# INTEL: shrx r22d, dword ptr [r28 + 4*r29 + 291], r18d +0x62,0x8a,0x6b,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00 + +# ATT: shrxq %r19, %r23, %r27 +# INTEL: shrx r27, r23, r19 +0x62,0x6a,0xe7,0x00,0xf7,0xdf + +# ATT: shrxq %r19, 291(%r28,%r29,4), %r23 +# INTEL: shrx r23, qword ptr [r28 + 4*r29 + 291], r19 +0x62,0x8a,0xe3,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00 diff --git a/llvm/test/MC/X86/apx/andn-att.s b/llvm/test/MC/X86/apx/andn-att.s new file mode 100644 index 00000000000000..d68cee8bcf1f72 --- /dev/null +++ b/llvm/test/MC/X86/apx/andn-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: andnl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x4c,0x00,0xf2,0xd2] + andnl %r18d, %r22d, %r26d + +# CHECK: andnq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xc4,0x00,0xf2,0xdb] + andnq %r19, %r23, %r27 + +# CHECK: andnl 291(%r28,%r29,4), %r18d, %r22d +# CHECK: encoding: [0x62,0x8a,0x68,0x00,0xf2,0xb4,0xac,0x23,0x01,0x00,0x00] + andnl 291(%r28,%r29,4), %r18d, %r22d + +# CHECK: andnq 291(%r28,%r29,4), %r19, %r23 +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf2,0xbc,0xac,0x23,0x01,0x00,0x00] + andnq 291(%r28,%r29,4), %r19, %r23 diff --git a/llvm/test/MC/X86/apx/andn-intel.s b/llvm/test/MC/X86/apx/andn-intel.s new file mode 100644 index 00000000000000..583e6e763b1eca --- /dev/null +++ b/llvm/test/MC/X86/apx/andn-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: andn r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x4c,0x00,0xf2,0xd2] + andn r26d, r22d, r18d + +# CHECK: andn r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xc4,0x00,0xf2,0xdb] + andn r27, r23, r19 + +# CHECK: andn r22d, r18d, dword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0x68,0x00,0xf2,0xb4,0xac,0x23,0x01,0x00,0x00] + andn r22d, r18d, dword ptr [r28 + 4*r29 + 291] + +# CHECK: andn r23, r19, qword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf2,0xbc,0xac,0x23,0x01,0x00,0x00] + andn r23, r19, qword ptr [r28 + 4*r29 + 291] diff --git a/llvm/test/MC/X86/apx/bextr-att.s b/llvm/test/MC/X86/apx/bextr-att.s new file mode 100644 index 00000000000000..6095ffa389a34c --- /dev/null +++ b/llvm/test/MC/X86/apx/bextr-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: bextrl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x6c,0x00,0xf7,0xd6] + bextrl %r18d, %r22d, %r26d + +# CHECK: bextrl %r18d, 291(%r28,%r29,4), %r22d +# CHECK: encoding: [0x62,0x8a,0x68,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + bextrl %r18d, 291(%r28,%r29,4), %r22d + +# CHECK: bextrq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xe4,0x00,0xf7,0xdf] + bextrq %r19, %r23, %r27 + +# CHECK: bextrq %r19, 291(%r28,%r29,4), %r23 +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + bextrq %r19, 291(%r28,%r29,4), %r23 diff --git a/llvm/test/MC/X86/apx/bextr-intel.s b/llvm/test/MC/X86/apx/bextr-intel.s new file mode 100644 index 00000000000000..af70c00c1d631d --- /dev/null +++ b/llvm/test/MC/X86/apx/bextr-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: bextr r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x6c,0x00,0xf7,0xd6] + bextr r26d, r22d, r18d + +# CHECK: bextr r22d, dword ptr [r28 + 4*r29 + 291], r18d +# CHECK: encoding: [0x62,0x8a,0x68,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + bextr r22d, dword ptr [r28 + 4*r29 + 291], r18d + +# CHECK: bextr r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xe4,0x00,0xf7,0xdf] + bextr r27, r23, r19 + +# CHECK: bextr r23, qword ptr [r28 + 4*r29 + 291], r19 +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + bextr r23, qword ptr [r28 + 4*r29 + 291], r19 diff --git a/llvm/test/MC/X86/apx/blsi-att.s b/llvm/test/MC/X86/apx/blsi-att.s new file mode 100644 index 00000000000000..65b2fd2b4d09b6 --- /dev/null +++ b/llvm/test/MC/X86/apx/blsi-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: blsil %r18d, %r22d +# CHECK: encoding: [0x62,0xfa,0x4c,0x00,0xf3,0xda] + blsil %r18d, %r22d + +# CHECK: blsiq %r19, %r23 +# CHECK: encoding: [0x62,0xfa,0xc4,0x00,0xf3,0xdb] + blsiq %r19, %r23 + +# CHECK: blsil 291(%r28,%r29,4), %r18d +# CHECK: encoding: [0x62,0x9a,0x68,0x00,0xf3,0x9c,0xac,0x23,0x01,0x00,0x00] + blsil 291(%r28,%r29,4), %r18d + +# CHECK: blsiq 291(%r28,%r29,4), %r19 +# CHECK: encoding: [0x62,0x9a,0xe0,0x00,0xf3,0x9c,0xac,0x23,0x01,0x00,0x00] + blsiq 291(%r28,%r29,4), %r19 diff --git a/llvm/test/MC/X86/apx/blsi-intel.s b/llvm/test/MC/X86/apx/blsi-intel.s new file mode 100644 index 00000000000000..edf5711cc74b57 --- /dev/null +++ b/llvm/test/MC/X86/apx/blsi-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: blsi r22d, r18d +# CHECK: encoding: [0x62,0xfa,0x4c,0x00,0xf3,0xda] + blsi r22d, r18d + +# CHECK: blsi r23, r19 +# CHECK: encoding: [0x62,0xfa,0xc4,0x00,0xf3,0xdb] + blsi r23, r19 + +# CHECK: blsi r18d, dword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x9a,0x68,0x00,0xf3,0x9c,0xac,0x23,0x01,0x00,0x00] + blsi r18d, dword ptr [r28 + 4*r29 + 291] + +# CHECK: blsi r19, qword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x9a,0xe0,0x00,0xf3,0x9c,0xac,0x23,0x01,0x00,0x00] + blsi r19, qword ptr [r28 + 4*r29 + 291] diff --git a/llvm/test/MC/X86/apx/blsmsk-att.s b/llvm/test/MC/X86/apx/blsmsk-att.s new file mode 100644 index 00000000000000..710fcabddcc3ab --- /dev/null +++ b/llvm/test/MC/X86/apx/blsmsk-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: blsmskl %r18d, %r22d +# CHECK: encoding: [0x62,0xfa,0x4c,0x00,0xf3,0xd2] + blsmskl %r18d, %r22d + +# CHECK: blsmskq %r19, %r23 +# CHECK: encoding: [0x62,0xfa,0xc4,0x00,0xf3,0xd3] + blsmskq %r19, %r23 + +# CHECK: blsmskl 291(%r28,%r29,4), %r18d +# CHECK: encoding: [0x62,0x9a,0x68,0x00,0xf3,0x94,0xac,0x23,0x01,0x00,0x00] + blsmskl 291(%r28,%r29,4), %r18d + +# CHECK: blsmskq 291(%r28,%r29,4), %r19 +# CHECK: encoding: [0x62,0x9a,0xe0,0x00,0xf3,0x94,0xac,0x23,0x01,0x00,0x00] + blsmskq 291(%r28,%r29,4), %r19 diff --git a/llvm/test/MC/X86/apx/blsmsk-intel.s b/llvm/test/MC/X86/apx/blsmsk-intel.s new file mode 100644 index 00000000000000..bb8197d3d41026 --- /dev/null +++ b/llvm/test/MC/X86/apx/blsmsk-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: blsmsk r22d, r18d +# CHECK: encoding: [0x62,0xfa,0x4c,0x00,0xf3,0xd2] + blsmsk r22d, r18d + +# CHECK: blsmsk r23, r19 +# CHECK: encoding: [0x62,0xfa,0xc4,0x00,0xf3,0xd3] + blsmsk r23, r19 + +# CHECK: blsmsk r18d, dword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x9a,0x68,0x00,0xf3,0x94,0xac,0x23,0x01,0x00,0x00] + blsmsk r18d, dword ptr [r28 + 4*r29 + 291] + +# CHECK: blsmsk r19, qword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x9a,0xe0,0x00,0xf3,0x94,0xac,0x23,0x01,0x00,0x00] + blsmsk r19, qword ptr [r28 + 4*r29 + 291] diff --git a/llvm/test/MC/X86/apx/blsr-att.s b/llvm/test/MC/X86/apx/blsr-att.s new file mode 100644 index 00000000000000..c9ca56149cf1a8 --- /dev/null +++ b/llvm/test/MC/X86/apx/blsr-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: blsrl %r18d, %r22d +# CHECK: encoding: [0x62,0xfa,0x4c,0x00,0xf3,0xca] + blsrl %r18d, %r22d + +# CHECK: blsrq %r19, %r23 +# CHECK: encoding: [0x62,0xfa,0xc4,0x00,0xf3,0xcb] + blsrq %r19, %r23 + +# CHECK: blsrl 291(%r28,%r29,4), %r18d +# CHECK: encoding: [0x62,0x9a,0x68,0x00,0xf3,0x8c,0xac,0x23,0x01,0x00,0x00] + blsrl 291(%r28,%r29,4), %r18d + +# CHECK: blsrq 291(%r28,%r29,4), %r19 +# CHECK: encoding: [0x62,0x9a,0xe0,0x00,0xf3,0x8c,0xac,0x23,0x01,0x00,0x00] + blsrq 291(%r28,%r29,4), %r19 diff --git a/llvm/test/MC/X86/apx/blsr-intel.s b/llvm/test/MC/X86/apx/blsr-intel.s new file mode 100644 index 00000000000000..acbfb81964614e --- /dev/null +++ b/llvm/test/MC/X86/apx/blsr-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: blsr r22d, r18d +# CHECK: encoding: [0x62,0xfa,0x4c,0x00,0xf3,0xca] + blsr r22d, r18d + +# CHECK: blsr r23, r19 +# CHECK: encoding: [0x62,0xfa,0xc4,0x00,0xf3,0xcb] + blsr r23, r19 + +# CHECK: blsr r18d, dword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x9a,0x68,0x00,0xf3,0x8c,0xac,0x23,0x01,0x00,0x00] + blsr r18d, dword ptr [r28 + 4*r29 + 291] + +# CHECK: blsr r19, qword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x9a,0xe0,0x00,0xf3,0x8c,0xac,0x23,0x01,0x00,0x00] + blsr r19, qword ptr [r28 + 4*r29 + 291] diff --git a/llvm/test/MC/X86/apx/bzhi-att.s b/llvm/test/MC/X86/apx/bzhi-att.s new file mode 100644 index 00000000000000..635cfa14e6b4f5 --- /dev/null +++ b/llvm/test/MC/X86/apx/bzhi-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: bzhil %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x6c,0x00,0xf5,0xd6] + bzhil %r18d, %r22d, %r26d + +# CHECK: bzhil %r18d, 291(%r28,%r29,4), %r22d +# CHECK: encoding: [0x62,0x8a,0x68,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00] + bzhil %r18d, 291(%r28,%r29,4), %r22d + +# CHECK: bzhiq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xe4,0x00,0xf5,0xdf] + bzhiq %r19, %r23, %r27 + +# CHECK: bzhiq %r19, 291(%r28,%r29,4), %r23 +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + bzhiq %r19, 291(%r28,%r29,4), %r23 diff --git a/llvm/test/MC/X86/apx/bzhi-intel.s b/llvm/test/MC/X86/apx/bzhi-intel.s new file mode 100644 index 00000000000000..f7ab72dd717ee7 --- /dev/null +++ b/llvm/test/MC/X86/apx/bzhi-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: bzhi r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x6c,0x00,0xf5,0xd6] + bzhi r26d, r22d, r18d + +# CHECK: bzhi r22d, dword ptr [r28 + 4*r29 + 291], r18d +# CHECK: encoding: [0x62,0x8a,0x68,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00] + bzhi r22d, dword ptr [r28 + 4*r29 + 291], r18d + +# CHECK: bzhi r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xe4,0x00,0xf5,0xdf] + bzhi r27, r23, r19 + +# CHECK: bzhi r23, qword ptr [r28 + 4*r29 + 291], r19 +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + bzhi r23, qword ptr [r28 + 4*r29 + 291], r19 diff --git a/llvm/test/MC/X86/apx/evex-format-att.s b/llvm/test/MC/X86/apx/evex-format-att.s index aedd09e7e698df..0b2e860d6ba090 100644 --- a/llvm/test/MC/X86/apx/evex-format-att.s +++ b/llvm/test/MC/X86/apx/evex-format-att.s @@ -60,8 +60,20 @@ # CHECK: encoding: [0x62,0xf9,0x79,0x48,0x73,0x3c,0x08,0x00] vpslldq $0, (%r16,%r17), %zmm0 +## MRMSrcMem4VOp3 + +# CHECK: bzhiq %r19, 291(%r28,%r29,4), %r23 +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + bzhiq %r19, 291(%r28,%r29,4), %r23 + ## MRMDestReg # CHECK: vextractps $1, %xmm16, %r16d # CHECK: encoding: [0x62,0xeb,0x7d,0x08,0x17,0xc0,0x01] vextractps $1, %xmm16, %r16d + +## MRMSrcReg4VOp3 + +# CHECK: bzhiq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xe4,0x00,0xf5,0xdf] + bzhiq %r19, %r23, %r27 diff --git a/llvm/test/MC/X86/apx/evex-format-intel.s b/llvm/test/MC/X86/apx/evex-format-intel.s index aa11a879f4b4c6..ececb7137b1101 100644 --- a/llvm/test/MC/X86/apx/evex-format-intel.s +++ b/llvm/test/MC/X86/apx/evex-format-intel.s @@ -60,8 +60,20 @@ # CHECK: encoding: [0x62,0xf9,0x79,0x48,0x73,0x3c,0x08,0x00] vpslldq zmm0, zmmword ptr [r16 + r17], 0 +## MRMSrcMem4VOp3 + +# CHECK: bzhi r23, qword ptr [r28 + 4*r29 + 291], r19 +# CHECK: encoding: [0x62,0x8a,0xe0,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + bzhi r23, qword ptr [r28 + 4*r29 + 291], r19 + ## MRMDestReg # CHECK: vextractps r16d, xmm16, 1 # CHECK: encoding: [0x62,0xeb,0x7d,0x08,0x17,0xc0,0x01] vextractps r16d, xmm16, 1 + +## MRMSrcReg4VOp3 + +# CHECK: bzhi r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xe4,0x00,0xf5,0xdf] + bzhi r27, r23, r19 diff --git a/llvm/test/MC/X86/apx/mulx-att.s b/llvm/test/MC/X86/apx/mulx-att.s new file mode 100644 index 00000000000000..976a79f469cd6f --- /dev/null +++ b/llvm/test/MC/X86/apx/mulx-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: mulxl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x4f,0x00,0xf6,0xd2] + mulxl %r18d, %r22d, %r26d + +# CHECK: mulxq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xc7,0x00,0xf6,0xdb] + mulxq %r19, %r23, %r27 + +# CHECK: mulxl 291(%r28,%r29,4), %r18d, %r22d +# CHECK: encoding: [0x62,0x8a,0x6b,0x00,0xf6,0xb4,0xac,0x23,0x01,0x00,0x00] + mulxl 291(%r28,%r29,4), %r18d, %r22d + +# CHECK: mulxq 291(%r28,%r29,4), %r19, %r23 +# CHECK: encoding: [0x62,0x8a,0xe3,0x00,0xf6,0xbc,0xac,0x23,0x01,0x00,0x00] + mulxq 291(%r28,%r29,4), %r19, %r23 diff --git a/llvm/test/MC/X86/apx/mulx-intel.s b/llvm/test/MC/X86/apx/mulx-intel.s new file mode 100644 index 00000000000000..3db587502915df --- /dev/null +++ b/llvm/test/MC/X86/apx/mulx-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: mulx r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x4f,0x00,0xf6,0xd2] + mulx r26d, r22d, r18d + +# CHECK: mulx r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xc7,0x00,0xf6,0xdb] + mulx r27, r23, r19 + +# CHECK: mulx r22d, r18d, dword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0x6b,0x00,0xf6,0xb4,0xac,0x23,0x01,0x00,0x00] + mulx r22d, r18d, dword ptr [r28 + 4*r29 + 291] + +# CHECK: mulx r23, r19, qword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0xe3,0x00,0xf6,0xbc,0xac,0x23,0x01,0x00,0x00] + mulx r23, r19, qword ptr [r28 + 4*r29 + 291] diff --git a/llvm/test/MC/X86/apx/pdep-att.s b/llvm/test/MC/X86/apx/pdep-att.s new file mode 100644 index 00000000000000..c319b17e47f6fc --- /dev/null +++ b/llvm/test/MC/X86/apx/pdep-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: pdepl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x4f,0x00,0xf5,0xd2] + pdepl %r18d, %r22d, %r26d + +# CHECK: pdepq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xc7,0x00,0xf5,0xdb] + pdepq %r19, %r23, %r27 + +# CHECK: pdepl 291(%r28,%r29,4), %r18d, %r22d +# CHECK: encoding: [0x62,0x8a,0x6b,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00] + pdepl 291(%r28,%r29,4), %r18d, %r22d + +# CHECK: pdepq 291(%r28,%r29,4), %r19, %r23 +# CHECK: encoding: [0x62,0x8a,0xe3,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + pdepq 291(%r28,%r29,4), %r19, %r23 diff --git a/llvm/test/MC/X86/apx/pdep-intel.s b/llvm/test/MC/X86/apx/pdep-intel.s new file mode 100644 index 00000000000000..0f9e828c021c3a --- /dev/null +++ b/llvm/test/MC/X86/apx/pdep-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: pdep r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x4f,0x00,0xf5,0xd2] + pdep r26d, r22d, r18d + +# CHECK: pdep r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xc7,0x00,0xf5,0xdb] + pdep r27, r23, r19 + +# CHECK: pdep r22d, r18d, dword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0x6b,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00] + pdep r22d, r18d, dword ptr [r28 + 4*r29 + 291] + +# CHECK: pdep r23, r19, qword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0xe3,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + pdep r23, r19, qword ptr [r28 + 4*r29 + 291] diff --git a/llvm/test/MC/X86/apx/pext-att.s b/llvm/test/MC/X86/apx/pext-att.s new file mode 100644 index 00000000000000..c07fa1ac2082af --- /dev/null +++ b/llvm/test/MC/X86/apx/pext-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: pextl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x4e,0x00,0xf5,0xd2] + pextl %r18d, %r22d, %r26d + +# CHECK: pextq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xc6,0x00,0xf5,0xdb] + pextq %r19, %r23, %r27 + +# CHECK: pextl 291(%r28,%r29,4), %r18d, %r22d +# CHECK: encoding: [0x62,0x8a,0x6a,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00] + pextl 291(%r28,%r29,4), %r18d, %r22d + +# CHECK: pextq 291(%r28,%r29,4), %r19, %r23 +# CHECK: encoding: [0x62,0x8a,0xe2,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + pextq 291(%r28,%r29,4), %r19, %r23 diff --git a/llvm/test/MC/X86/apx/pext-intel.s b/llvm/test/MC/X86/apx/pext-intel.s new file mode 100644 index 00000000000000..9a7e7d93094a42 --- /dev/null +++ b/llvm/test/MC/X86/apx/pext-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: pext r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x4e,0x00,0xf5,0xd2] + pext r26d, r22d, r18d + +# CHECK: pext r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xc6,0x00,0xf5,0xdb] + pext r27, r23, r19 + +# CHECK: pext r22d, r18d, dword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0x6a,0x00,0xf5,0xb4,0xac,0x23,0x01,0x00,0x00] + pext r22d, r18d, dword ptr [r28 + 4*r29 + 291] + +# CHECK: pext r23, r19, qword ptr [r28 + 4*r29 + 291] +# CHECK: encoding: [0x62,0x8a,0xe2,0x00,0xf5,0xbc,0xac,0x23,0x01,0x00,0x00] + pext r23, r19, qword ptr [r28 + 4*r29 + 291] diff --git a/llvm/test/MC/X86/apx/rorx-att.s b/llvm/test/MC/X86/apx/rorx-att.s new file mode 100644 index 00000000000000..fb613d95c7cb43 --- /dev/null +++ b/llvm/test/MC/X86/apx/rorx-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: rorxl $123, %r18d, %r22d +# CHECK: encoding: [0x62,0xeb,0x7f,0x08,0xf0,0xf2,0x7b] + rorxl $123, %r18d, %r22d + +# CHECK: rorxq $123, %r19, %r23 +# CHECK: encoding: [0x62,0xeb,0xff,0x08,0xf0,0xfb,0x7b] + rorxq $123, %r19, %r23 + +# CHECK: rorxl $123, 291(%r28,%r29,4), %r18d +# CHECK: encoding: [0x62,0x8b,0x7b,0x08,0xf0,0x94,0xac,0x23,0x01,0x00,0x00,0x7b] + rorxl $123, 291(%r28,%r29,4), %r18d + +# CHECK: rorxq $123, 291(%r28,%r29,4), %r19 +# CHECK: encoding: [0x62,0x8b,0xfb,0x08,0xf0,0x9c,0xac,0x23,0x01,0x00,0x00,0x7b] + rorxq $123, 291(%r28,%r29,4), %r19 diff --git a/llvm/test/MC/X86/apx/rorx-intel.s b/llvm/test/MC/X86/apx/rorx-intel.s new file mode 100644 index 00000000000000..d3e63559cba579 --- /dev/null +++ b/llvm/test/MC/X86/apx/rorx-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: rorx r22d, r18d, 123 +# CHECK: encoding: [0x62,0xeb,0x7f,0x08,0xf0,0xf2,0x7b] + rorx r22d, r18d, 123 + +# CHECK: rorx r23, r19, 123 +# CHECK: encoding: [0x62,0xeb,0xff,0x08,0xf0,0xfb,0x7b] + rorx r23, r19, 123 + +# CHECK: rorx r18d, dword ptr [r28 + 4*r29 + 291], 123 +# CHECK: encoding: [0x62,0x8b,0x7b,0x08,0xf0,0x94,0xac,0x23,0x01,0x00,0x00,0x7b] + rorx r18d, dword ptr [r28 + 4*r29 + 291], 123 + +# CHECK: rorx r19, qword ptr [r28 + 4*r29 + 291], 123 +# CHECK: encoding: [0x62,0x8b,0xfb,0x08,0xf0,0x9c,0xac,0x23,0x01,0x00,0x00,0x7b] + rorx r19, qword ptr [r28 + 4*r29 + 291], 123 diff --git a/llvm/test/MC/X86/apx/sarx-att.s b/llvm/test/MC/X86/apx/sarx-att.s new file mode 100644 index 00000000000000..a174903d976cbf --- /dev/null +++ b/llvm/test/MC/X86/apx/sarx-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: sarxl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x6e,0x00,0xf7,0xd6] + sarxl %r18d, %r22d, %r26d + +# CHECK: sarxl %r18d, 291(%r28,%r29,4), %r22d +# CHECK: encoding: [0x62,0x8a,0x6a,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + sarxl %r18d, 291(%r28,%r29,4), %r22d + +# CHECK: sarxq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xe6,0x00,0xf7,0xdf] + sarxq %r19, %r23, %r27 + +# CHECK: sarxq %r19, 291(%r28,%r29,4), %r23 +# CHECK: encoding: [0x62,0x8a,0xe2,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + sarxq %r19, 291(%r28,%r29,4), %r23 diff --git a/llvm/test/MC/X86/apx/sarx-intel.s b/llvm/test/MC/X86/apx/sarx-intel.s new file mode 100644 index 00000000000000..962b6ec313b987 --- /dev/null +++ b/llvm/test/MC/X86/apx/sarx-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: sarx r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x6e,0x00,0xf7,0xd6] + sarx r26d, r22d, r18d + +# CHECK: sarx r22d, dword ptr [r28 + 4*r29 + 291], r18d +# CHECK: encoding: [0x62,0x8a,0x6a,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + sarx r22d, dword ptr [r28 + 4*r29 + 291], r18d + +# CHECK: sarx r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xe6,0x00,0xf7,0xdf] + sarx r27, r23, r19 + +# CHECK: sarx r23, qword ptr [r28 + 4*r29 + 291], r19 +# CHECK: encoding: [0x62,0x8a,0xe2,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + sarx r23, qword ptr [r28 + 4*r29 + 291], r19 diff --git a/llvm/test/MC/X86/apx/shlx-att.s b/llvm/test/MC/X86/apx/shlx-att.s new file mode 100644 index 00000000000000..4e28119f083056 --- /dev/null +++ b/llvm/test/MC/X86/apx/shlx-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: shlxl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x6d,0x00,0xf7,0xd6] + shlxl %r18d, %r22d, %r26d + +# CHECK: shlxl %r18d, 291(%r28,%r29,4), %r22d +# CHECK: encoding: [0x62,0x8a,0x69,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + shlxl %r18d, 291(%r28,%r29,4), %r22d + +# CHECK: shlxq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xe5,0x00,0xf7,0xdf] + shlxq %r19, %r23, %r27 + +# CHECK: shlxq %r19, 291(%r28,%r29,4), %r23 +# CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + shlxq %r19, 291(%r28,%r29,4), %r23 diff --git a/llvm/test/MC/X86/apx/shlx-intel.s b/llvm/test/MC/X86/apx/shlx-intel.s new file mode 100644 index 00000000000000..9f16918a712dc5 --- /dev/null +++ b/llvm/test/MC/X86/apx/shlx-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: shlx r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x6d,0x00,0xf7,0xd6] + shlx r26d, r22d, r18d + +# CHECK: shlx r22d, dword ptr [r28 + 4*r29 + 291], r18d +# CHECK: encoding: [0x62,0x8a,0x69,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + shlx r22d, dword ptr [r28 + 4*r29 + 291], r18d + +# CHECK: shlx r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xe5,0x00,0xf7,0xdf] + shlx r27, r23, r19 + +# CHECK: shlx r23, qword ptr [r28 + 4*r29 + 291], r19 +# CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + shlx r23, qword ptr [r28 + 4*r29 + 291], r19 diff --git a/llvm/test/MC/X86/apx/shrx-att.s b/llvm/test/MC/X86/apx/shrx-att.s new file mode 100644 index 00000000000000..d9bb5f84af73d4 --- /dev/null +++ b/llvm/test/MC/X86/apx/shrx-att.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: +# CHECK: shrxl %r18d, %r22d, %r26d +# CHECK: encoding: [0x62,0x6a,0x6f,0x00,0xf7,0xd6] + shrxl %r18d, %r22d, %r26d + +# CHECK: shrxl %r18d, 291(%r28,%r29,4), %r22d +# CHECK: encoding: [0x62,0x8a,0x6b,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + shrxl %r18d, 291(%r28,%r29,4), %r22d + +# CHECK: shrxq %r19, %r23, %r27 +# CHECK: encoding: [0x62,0x6a,0xe7,0x00,0xf7,0xdf] + shrxq %r19, %r23, %r27 + +# CHECK: shrxq %r19, 291(%r28,%r29,4), %r23 +# CHECK: encoding: [0x62,0x8a,0xe3,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + shrxq %r19, 291(%r28,%r29,4), %r23 diff --git a/llvm/test/MC/X86/apx/shrx-intel.s b/llvm/test/MC/X86/apx/shrx-intel.s new file mode 100644 index 00000000000000..385c530a1108be --- /dev/null +++ b/llvm/test/MC/X86/apx/shrx-intel.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +# CHECK: shrx r26d, r22d, r18d +# CHECK: encoding: [0x62,0x6a,0x6f,0x00,0xf7,0xd6] + shrx r26d, r22d, r18d + +# CHECK: shrx r22d, dword ptr [r28 + 4*r29 + 291], r18d +# CHECK: encoding: [0x62,0x8a,0x6b,0x00,0xf7,0xb4,0xac,0x23,0x01,0x00,0x00] + shrx r22d, dword ptr [r28 + 4*r29 + 291], r18d + +# CHECK: shrx r27, r23, r19 +# CHECK: encoding: [0x62,0x6a,0xe7,0x00,0xf7,0xdf] + shrx r27, r23, r19 + +# CHECK: shrx r23, qword ptr [r28 + 4*r29 + 291], r19 +# CHECK: encoding: [0x62,0x8a,0xe3,0x00,0xf7,0xbc,0xac,0x23,0x01,0x00,0x00] + shrx r23, qword ptr [r28 + 4*r29 + 291], r19 diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index dcf650434c8169..b2609f01e86a21 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -411,7 +411,9 @@ static const X86FoldTableEntry Table1[] = { {X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16}, {X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16}, {X86::BEXTR32rr, X86::BEXTR32rm, 0}, + {X86::BEXTR32rr_EVEX, X86::BEXTR32rm_EVEX, 0}, {X86::BEXTR64rr, X86::BEXTR64rm, 0}, + {X86::BEXTR64rr_EVEX, X86::BEXTR64rm_EVEX, 0}, {X86::BEXTRI32ri, X86::BEXTRI32mi, 0}, {X86::BEXTRI64ri, X86::BEXTRI64mi, 0}, {X86::BLCFILL32rr, X86::BLCFILL32rm, 0}, @@ -427,13 +429,19 @@ static const X86FoldTableEntry Table1[] = { {X86::BLSFILL32rr, X86::BLSFILL32rm, 0}, {X86::BLSFILL64rr, X86::BLSFILL64rm, 0}, {X86::BLSI32rr, X86::BLSI32rm, 0}, + {X86::BLSI32rr_EVEX, X86::BLSI32rm_EVEX, 0}, {X86::BLSI64rr, X86::BLSI64rm, 0}, + {X86::BLSI64rr_EVEX, X86::BLSI64rm_EVEX, 0}, {X86::BLSIC32rr, X86::BLSIC32rm, 0}, {X86::BLSIC64rr, X86::BLSIC64rm, 0}, {X86::BLSMSK32rr, X86::BLSMSK32rm, 0}, + {X86::BLSMSK32rr_EVEX, X86::BLSMSK32rm_EVEX, 0}, {X86::BLSMSK64rr, X86::BLSMSK64rm, 0}, + {X86::BLSMSK64rr_EVEX, X86::BLSMSK64rm_EVEX, 0}, {X86::BLSR32rr, X86::BLSR32rm, 0}, + {X86::BLSR32rr_EVEX, X86::BLSR32rm_EVEX, 0}, {X86::BLSR64rr, X86::BLSR64rm, 0}, + {X86::BLSR64rr_EVEX, X86::BLSR64rm_EVEX, 0}, {X86::BSF16rr, X86::BSF16rm, 0}, {X86::BSF32rr, X86::BSF32rm, 0}, {X86::BSF64rr, X86::BSF64rm, 0}, @@ -441,7 +449,9 @@ static const X86FoldTableEntry Table1[] = { {X86::BSR32rr, X86::BSR32rm, 0}, {X86::BSR64rr, X86::BSR64rm, 0}, {X86::BZHI32rr, X86::BZHI32rm, 0}, + {X86::BZHI32rr_EVEX, X86::BZHI32rm_EVEX, 0}, {X86::BZHI64rr, X86::BZHI64rm, 0}, + {X86::BZHI64rr_EVEX, X86::BZHI64rm_EVEX, 0}, {X86::CMP16rr, X86::CMP16rm, 0}, {X86::CMP32rr, X86::CMP32rm, 0}, {X86::CMP64rr, X86::CMP64rm, 0}, @@ -582,7 +592,9 @@ static const X86FoldTableEntry Table1[] = { {X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16}, {X86::RCPSSr, X86::RCPSSm, 0}, {X86::RORX32ri, X86::RORX32mi, 0}, + {X86::RORX32ri_EVEX, X86::RORX32mi_EVEX, 0}, {X86::RORX64ri, X86::RORX64mi, 0}, + {X86::RORX64ri_EVEX, X86::RORX64mi_EVEX, 0}, {X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16}, {X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16}, {X86::ROUNDSDr, X86::ROUNDSDm, 0}, @@ -590,11 +602,17 @@ static const X86FoldTableEntry Table1[] = { {X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16}, {X86::RSQRTSSr, X86::RSQRTSSm, 0}, {X86::SARX32rr, X86::SARX32rm, 0}, + {X86::SARX32rr_EVEX, X86::SARX32rm_EVEX, 0}, {X86::SARX64rr, X86::SARX64rm, 0}, + {X86::SARX64rr_EVEX, X86::SARX64rm_EVEX, 0}, {X86::SHLX32rr, X86::SHLX32rm, 0}, + {X86::SHLX32rr_EVEX, X86::SHLX32rm_EVEX, 0}, {X86::SHLX64rr, X86::SHLX64rm, 0}, + {X86::SHLX64rr_EVEX, X86::SHLX64rm_EVEX, 0}, {X86::SHRX32rr, X86::SHRX32rm, 0}, + {X86::SHRX32rr_EVEX, X86::SHRX32rm_EVEX, 0}, {X86::SHRX64rr, X86::SHRX64rm, 0}, + {X86::SHRX64rr_EVEX, X86::SHRX64rm_EVEX, 0}, {X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16}, {X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16}, {X86::SQRTSDr, X86::SQRTSDm, 0}, @@ -1332,7 +1350,9 @@ static const X86FoldTableEntry Table2[] = { {X86::AND64rr, X86::AND64rm, 0}, {X86::AND8rr, X86::AND8rm, 0}, {X86::ANDN32rr, X86::ANDN32rm, 0}, + {X86::ANDN32rr_EVEX, X86::ANDN32rm_EVEX, 0}, {X86::ANDN64rr, X86::ANDN64rm, 0}, + {X86::ANDN64rr_EVEX, X86::ANDN64rm_EVEX, 0}, {X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16}, {X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16}, {X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16}, @@ -1479,7 +1499,9 @@ static const X86FoldTableEntry Table2[] = { {X86::MULSSrr, X86::MULSSrm, 0}, {X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE}, {X86::MULX32rr, X86::MULX32rm, 0}, + {X86::MULX32rr_EVEX, X86::MULX32rm_EVEX, 0}, {X86::MULX64rr, X86::MULX64rm, 0}, + {X86::MULX64rr_EVEX, X86::MULX64rm_EVEX, 0}, {X86::OR16rr, X86::OR16rm, 0}, {X86::OR32rr, X86::OR32rm, 0}, {X86::OR64rr, X86::OR64rm, 0}, @@ -1516,9 +1538,13 @@ static const X86FoldTableEntry Table2[] = { {X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16}, {X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16}, {X86::PDEP32rr, X86::PDEP32rm, 0}, + {X86::PDEP32rr_EVEX, X86::PDEP32rm_EVEX, 0}, {X86::PDEP64rr, X86::PDEP64rm, 0}, + {X86::PDEP64rr_EVEX, X86::PDEP64rm_EVEX, 0}, {X86::PEXT32rr, X86::PEXT32rm, 0}, + {X86::PEXT32rr_EVEX, X86::PEXT32rm_EVEX, 0}, {X86::PEXT64rr, X86::PEXT64rm, 0}, + {X86::PEXT64rr_EVEX, X86::PEXT64rm_EVEX, 0}, {X86::PFACCrr, X86::PFACCrm, 0}, {X86::PFADDrr, X86::PFADDrm, 0}, {X86::PFCMPEQrr, X86::PFCMPEQrm, 0}, From f42ce1621f5f4129fb37c4a1af958e1d47344107 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Fri, 1 Dec 2023 10:08:00 +0000 Subject: [PATCH 19/72] [mlir][sve][nfc] Update a test to use transform-interpreter (#73771) This is a follow-up of #70040 in which the test updated here was missed. Includes a few additional NFC changes in preparation for extending this test. --- .../Dialect/Linalg/CPU/ArmSVE/matmul.mlir | 70 +++++++++++-------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir index 2024da2a585d99..d771d32d548bbe 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir @@ -1,8 +1,14 @@ -// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule \ -// RUN: -one-shot-bufferize -func-bufferize -cse -canonicalize -convert-vector-to-scf -arm-sve-legalize-vector-storage \ -// RUN: -convert-vector-to-llvm="enable-arm-sve" -test-lower-to-llvm | \ -// RUN: %mcr_aarch64_cmd -e=matmul_f32 -entry-point-result=void --march=aarch64 --mattr="+sve" -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \ -// RUN: FileCheck %s +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: -transform-interpreter -test-transform-dialect-erase-schedule \ +// DEFINE: -one-shot-bufferize -func-bufferize -cse -canonicalize -convert-vector-to-scf -arm-sve-legalize-vector-storage \ +// DEFINE: -convert-vector-to-llvm="enable-arm-sve" -test-lower-to-llvm -o %t +// DEFINE: %{entry_point} = matmul_f32 +// DEFINE: %{run} = %mcr_aarch64_cmd %t -e %{entry_point} -entry-point-result=void --march=aarch64 --mattr="+sve"\ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils + +// RUN: %{compile} + +// RUN: %{run} | FileCheck %s func.func @matmul_f32() { // Matrix dimensions @@ -40,29 +46,37 @@ func.func @matmul_f32() { return } -transform.sequence failures(propagate) { -^bb1(%module_op: !transform.any_op): - // Step 1: Tile - %matmul = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op - %func_op = get_parent_op %matmul : (!transform.any_op) -> !transform.op<"func.func"> - %module_with_tiled_loops, %loops:3 = transform.structured.tile_using_for %matmul [2, [4], 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) - - // Step 2: Vectorize - %tiled_matmul = transform.structured.match ops{["linalg.matmul"]} in %module_with_tiled_loops : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %tiled_matmul vector_sizes [2, [4], 1] : !transform.any_op - - // Step 3: Lower vector.multi_reduction to vector.contract (+ some helpful patterns) - transform.apply_patterns to %func_op { - transform.apply_patterns.vector.reduction_to_contract - transform.apply_patterns.vector.transfer_permutation_patterns - transform.apply_patterns.vector.lower_masked_transfers - } : !transform.op<"func.func"> - - // Step 4: Lower vector.contract to vector.fma - transform.apply_patterns to %func_op { - transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct" - transform.apply_patterns.vector.lower_outerproduct - } : !transform.op<"func.func"> +module attributes {transform.with_named_sequence} { +transform.named_sequence @__transform_main(%module: !transform.any_op {transform.readonly}) { + %matmul = transform.structured.match ops{["linalg.matmul"]} in %module + : (!transform.any_op) -> !transform.any_op + + // Step 1: Tile + %module_with_tiled_loops, %loops:3 = transform.structured.tile_using_for %matmul [2, [4], 1] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + + // Step 2: Vectorize + %tiled_matmul = transform.structured.match ops{["linalg.matmul"]} in %module_with_tiled_loops + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %tiled_matmul vector_sizes [2, [4], 1] : !transform.any_op + + // Step 3: Lower vector.multi_reduction to vector.contract (+ some helpful patterns) + %func = transform.structured.match ops{["func.func"]} in %module + : (!transform.any_op) -> !transform.op<"func.func"> + transform.apply_patterns to %func { + transform.apply_patterns.vector.reduction_to_contract + transform.apply_patterns.vector.transfer_permutation_patterns + transform.apply_patterns.vector.lower_masked_transfers + } : !transform.op<"func.func"> + + // Step 4: Lower vector.contract to vector.fma + transform.apply_patterns to %func { + transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct" + transform.apply_patterns.vector.lower_outerproduct + } : !transform.op<"func.func"> + + transform.yield + } } func.func private @printMemrefF32(%ptr : tensor<*xf32>) From 1ee41b415398cde51c055a7b1a4d419350e7038f Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 1 Dec 2023 11:29:05 +0100 Subject: [PATCH 20/72] [libc++][NFC] Update the remaining old license headers --- libcxx/test/libcxx/numerics/bit.ops.pass.cpp | 8 ++++---- .../memory/ptr.align/assume_aligned.power2.verify.cpp | 7 +++---- .../sequences/forwardlist/forwardlist.spec/equal.pass.cpp | 7 +++---- .../forwardlist/forwardlist.spec/member_swap.pass.cpp | 7 +++---- .../forwardlist/forwardlist.spec/non_member_swap.pass.cpp | 7 +++---- .../forwardlist/forwardlist.spec/relational.pass.cpp | 7 +++---- .../forwardlist/forwardlist.spec/swap_noexcept.pass.cpp | 7 +++---- .../support.dynamic/destroying_delete_t.pass.cpp | 7 +++---- .../destroying_delete_t_declaration.pass.cpp | 7 +++---- .../language.support/support.dynamic/nothrow_t.pass.cpp | 7 +++---- .../language.support/support.dynamic/nothrow_t.verify.cpp | 7 +++---- .../test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp | 8 ++++---- .../test/std/numerics/bit/bit.pow.two/bit_ceil.verify.cpp | 8 ++++---- .../test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp | 8 ++++---- .../test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp | 8 ++++---- .../std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp | 8 ++++---- .../std/numerics/bit/bitops.count/countl_one.pass.cpp | 8 ++++---- .../std/numerics/bit/bitops.count/countl_zero.pass.cpp | 8 ++++---- .../std/numerics/bit/bitops.count/countr_one.pass.cpp | 8 ++++---- .../std/numerics/bit/bitops.count/countr_zero.pass.cpp | 8 ++++---- .../test/std/numerics/bit/bitops.count/popcount.pass.cpp | 8 ++++---- libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp | 8 ++++---- libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp | 8 ++++---- .../std/thread/thread.mutex/thread.lock/types.verify.cpp | 7 +++---- .../func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp | 7 +++---- .../func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp | 7 +++---- .../func.wrap.func/func.wrap.func.con/deduct_ptr.pass.cpp | 7 +++---- .../util.smartptr.shared.spec/swap.pass.cpp | 7 +++---- .../util.smartptr.weak.spec/swap.pass.cpp | 7 +++---- .../meta.trans.other/common_reference.compile.pass.cpp | 7 +++---- 30 files changed, 103 insertions(+), 120 deletions(-) diff --git a/libcxx/test/libcxx/numerics/bit.ops.pass.cpp b/libcxx/test/libcxx/numerics/bit.ops.pass.cpp index 2a509db1d79a02..d3ca8b2f8030bd 100644 --- a/libcxx/test/libcxx/numerics/bit.ops.pass.cpp +++ b/libcxx/test/libcxx/numerics/bit.ops.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // Test the __XXXX routines in the header. // These are not supposed to be exhaustive tests, just sanity checks. diff --git a/libcxx/test/libcxx/utilities/memory/ptr.align/assume_aligned.power2.verify.cpp b/libcxx/test/libcxx/utilities/memory/ptr.align/assume_aligned.power2.verify.cpp index 92c9e927a546e5..b206fe31ea1966 100644 --- a/libcxx/test/libcxx/utilities/memory/ptr.align/assume_aligned.power2.verify.cpp +++ b/libcxx/test/libcxx/utilities/memory/ptr.align/assume_aligned.power2.verify.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/equal.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/equal.pass.cpp index a727487ed0d9c5..b21035f7dd7468 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/equal.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/equal.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp index 6b16d66fedb2ec..e7eea2e87bfec3 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp index e46a55cf81e4f1..54d26d0a649179 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/relational.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/relational.pass.cpp index 29a180a9661242..9ca19486a54b9c 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/relational.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/relational.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp index b4568837a2e015..76f8b6213908b0 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp index 7f52e2d8d50800..95d2c41d7bfebc 100644 --- a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp index b98af1bfe1e45f..1270853a1ccabf 100644 --- a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/language.support/support.dynamic/nothrow_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/nothrow_t.pass.cpp index 3a6231329f0964..bfc41cb141aac2 100644 --- a/libcxx/test/std/language.support/support.dynamic/nothrow_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/nothrow_t.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/language.support/support.dynamic/nothrow_t.verify.cpp b/libcxx/test/std/language.support/support.dynamic/nothrow_t.verify.cpp index 50dd63a6350f00..f2b345e809695b 100644 --- a/libcxx/test/std/language.support/support.dynamic/nothrow_t.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/nothrow_t.verify.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp index 4e794f129f3660..5e37db95ab090e 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.verify.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.verify.cpp index 9424b0b24f8aad..d37de690a48dba 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.verify.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.verify.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp index 06ee38cf8261b2..38a46fcc122274 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp index cfb9a163b4fa7e..baf2032a4a1f00 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp index 7b23627a3d02ed..81dca301e21fb3 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp index bbce57b9caea14..92268cf563b471 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp index f103450eb834f7..9d5d361662e8c8 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp index 8e8ef1d535a54a..63b60640ac0480 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp index d0fdf921a09db0..1df1d883a12e1f 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp index b8759c440432ee..588c5e0cf7af27 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp index a1be03453abe3d..50e498b5761e54 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp index 89fef32c36d4ff..00c9e617d2edf3 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp @@ -1,11 +1,11 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + // UNSUPPORTED: c++03, c++11, c++14, c++17 // template diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/types.verify.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/types.verify.cpp index 623e5c3d22e75b..1688470e1ac4d1 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/types.verify.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/types.verify.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp index 00d183168d4b02..ef43ab9b64b5b5 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp index 520e5f055a71f6..8a42d3be3571c0 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.verify.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_ptr.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_ptr.pass.cpp index cc61a75c84f988..ed4e0e96de3b46 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_ptr.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_ptr.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp index 47ae5dd8f7297d..94986eaa9e3ff3 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp index 98429fd740638d..d6fcd882cd6675 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_reference.compile.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_reference.compile.pass.cpp index 9332865eaa2450..04a1451863c90a 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_reference.compile.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_reference.compile.pass.cpp @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// From f7d91faa790630eca506a29faa560d6783edcbc0 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 1 Dec 2023 10:39:01 +0000 Subject: [PATCH 21/72] [mlir][ArmSME] Add option to only enable streaming mode/ZA if required (#73931) This adds a `only-if-required-by-ops` flag to the `enable-arm-streaming` pass. This flag defaults to `false` (which preserves the original behaviour), however, if set to `true` the pass will only add the selected ZA/streaming mode to functions that contain ops that implement `ArmSMETileOpInterface`. This simplifies enabling these modes, as we can now first try lowering ops to ArmSME, then only if we succeed, add the relevant function attributes. --- .../mlir/Dialect/ArmSME/Transforms/Passes.h | 2 +- .../mlir/Dialect/ArmSME/Transforms/Passes.td | 6 ++++- .../ArmSME/Transforms/EnableArmStreaming.cpp | 25 ++++++++++++++++--- .../Dialect/ArmSME/enable-arm-streaming.mlir | 16 ++++++++++++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h index 11a7385fe311dd..21a97e9cbc794c 100644 --- a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h @@ -27,7 +27,7 @@ namespace arm_sme { /// Pass to enable Armv9 Streaming SVE mode. std::unique_ptr createEnableArmStreamingPass( const ArmStreamingMode = ArmStreamingMode::Streaming, - const ArmZaMode = ArmZaMode::Disabled); + const ArmZaMode = ArmZaMode::Disabled, bool onlyIfRequiredByOps = false); /// Pass that allocates tile IDs to ArmSME operations. std::unique_ptr createTileAllocationPass(); diff --git a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td index 3253b47e62abdd..7b9c74e0b8f60e 100644 --- a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td @@ -73,7 +73,11 @@ def EnableArmStreaming "new-za", "The function has ZA state. The ZA state is " "created on entry and destroyed on exit.") - )}]> + )}]>, + Option<"onlyIfRequiredByOps", "only-if-required-by-ops", "bool", + /*default=*/"false", + "Only apply the selected streaming/ZA modes if the function " + " contains ops that require them."> ]; let dependentDialects = ["func::FuncDialect"]; } diff --git a/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp b/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp index c3a1a1c9a3fb49..79a6caffb6ee0b 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp @@ -33,6 +33,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/ArmSME/IR/ArmSME.h" #include "mlir/Dialect/ArmSME/Transforms/Passes.h" #include "mlir/Dialect/ArmSME/Transforms/PassesEnums.cpp.inc" @@ -56,12 +57,28 @@ constexpr StringLiteral struct EnableArmStreamingPass : public arm_sme::impl::EnableArmStreamingBase { - EnableArmStreamingPass(ArmStreamingMode streamingMode, ArmZaMode zaMode) { + EnableArmStreamingPass(ArmStreamingMode streamingMode, ArmZaMode zaMode, + bool onlyIfRequiredByOps) { this->streamingMode = streamingMode; this->zaMode = zaMode; + this->onlyIfRequiredByOps = onlyIfRequiredByOps; } void runOnOperation() override { auto op = getOperation(); + + if (onlyIfRequiredByOps) { + bool foundTileOp = false; + op.walk([&](Operation *op) { + if (llvm::isa(op)) { + foundTileOp = true; + return WalkResult::interrupt(); + } + return WalkResult::advance(); + }); + if (!foundTileOp) + return; + } + if (op->getAttr(kEnableArmStreamingIgnoreAttr) || streamingMode == ArmStreamingMode::Disabled) return; @@ -81,6 +98,8 @@ struct EnableArmStreamingPass } // namespace std::unique_ptr mlir::arm_sme::createEnableArmStreamingPass( - const ArmStreamingMode streamingMode, const ArmZaMode zaMode) { - return std::make_unique(streamingMode, zaMode); + const ArmStreamingMode streamingMode, const ArmZaMode zaMode, + bool onlyIfRequiredByOps) { + return std::make_unique(streamingMode, zaMode, + onlyIfRequiredByOps); } diff --git a/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir b/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir index 70119b08c3e91a..b1188acbc0b2d7 100644 --- a/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir +++ b/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s -enable-arm-streaming -verify-diagnostics | FileCheck %s // RUN: mlir-opt %s -enable-arm-streaming=streaming-mode=streaming-locally -verify-diagnostics | FileCheck %s -check-prefix=CHECK-LOCALLY // RUN: mlir-opt %s -enable-arm-streaming=za-mode=new-za -verify-diagnostics | FileCheck %s -check-prefix=CHECK-ENABLE-ZA +// RUN: mlir-opt %s -enable-arm-streaming=only-if-required-by-ops -verify-diagnostics | FileCheck %s -check-prefix=IF-REQUIRED // CHECK-LABEL: @arm_streaming // CHECK-SAME: attributes {arm_streaming} @@ -17,3 +18,18 @@ func.func @arm_streaming() { return } // CHECK-ENABLE-ZA-LABEL: @not_arm_streaming // CHECK-ENABLE-ZA-SAME: attributes {enable_arm_streaming_ignore} func.func @not_arm_streaming() attributes {enable_arm_streaming_ignore} { return } + +// CHECK-LABEL: @requires_arm_streaming +// CHECK-SAME: attributes {arm_streaming} +// IF-REQUIRED: @requires_arm_streaming +// IF-REQUIRED-SAME: attributes {arm_streaming} +func.func @requires_arm_streaming() { + %tile = arm_sme.get_tile : vector<[4]x[4]xi32> + return +} + +// CHECK-LABEL: @does_not_require_arm_streaming +// CHECK-SAME: attributes {arm_streaming} +// IF-REQUIRED: @does_not_require_arm_streaming +// IF-REQUIRED-NOT: arm_streaming +func.func @does_not_require_arm_streaming() { return } From da1aff2b2a3192f5e32fa350de19aac0b89fed18 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 1 Dec 2023 10:40:24 +0000 Subject: [PATCH 22/72] [llvm][PowerPC] Correct handling of spill slots for SPE when EXPENSIVE_CHECKS is enabled (#73940) This was modifying a container as it iterated it, which tripped a check in libstdc++'s debug checks. Instead, just assign to the item via the reference we already have. This fixes the following expensive checks failures on my machine: LLVM :: CodeGen/PowerPC/fp-strict.ll LLVM :: CodeGen/PowerPC/pr55463.ll LLVM :: CodeGen/PowerPC/register-pressure.ll LLVM :: CodeGen/PowerPC/spe.ll Which are some of the tests noted by #68594. --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 24 +++++++------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index eb3bf3b2690b22..245e78641ed654 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2334,24 +2334,16 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots( // In case of SPE we only have SuperRegs and CRs // in our CalleSaveInfo vector. - unsigned Idx = 0; for (auto &CalleeSaveReg : CSI) { - const MCPhysReg &Reg = CalleeSaveReg.getReg(); - const MCPhysReg &Lower = RegInfo->getSubReg(Reg, 1); - const MCPhysReg &Higher = RegInfo->getSubReg(Reg, 2); - - // Check only for SuperRegs. - if (Lower) { - if (MRI.isPhysRegModified(Higher)) { - Idx++; - continue; - } else { + MCPhysReg Reg = CalleeSaveReg.getReg(); + MCPhysReg Lower = RegInfo->getSubReg(Reg, 1); + MCPhysReg Higher = RegInfo->getSubReg(Reg, 2); + + if ( // Check only for SuperRegs. + Lower && // Replace Reg if only lower-32 bits modified - CSI.erase(CSI.begin() + Idx); - CSI.insert(CSI.begin() + Idx, CalleeSavedInfo(Lower)); - } - } - Idx++; + !MRI.isPhysRegModified(Higher)) + CalleeSaveReg = CalleeSavedInfo(Lower); } } From 289fe74ddbb4c8aa7128f60db6b20c119922b542 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Fri, 1 Dec 2023 13:35:23 +0300 Subject: [PATCH 23/72] [clang][NFC] Fill in historical data on when C++ DRs 100-199 were fixed --- clang/test/CXX/drs/dr1xx.cpp | 20 ++++++++++---------- clang/www/cxx_dr_status.html | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/clang/test/CXX/drs/dr1xx.cpp b/clang/test/CXX/drs/dr1xx.cpp index 60e80a4c0e1c4f..50236eb7c9499d 100644 --- a/clang/test/CXX/drs/dr1xx.cpp +++ b/clang/test/CXX/drs/dr1xx.cpp @@ -72,7 +72,7 @@ namespace dr107 { // dr107: yes extern "C" S operator+(S, S) { return S(); } } -namespace dr108 { // dr108: yes +namespace dr108 { // dr108: 2.9 template struct A { struct B { typedef int X; }; B::X x; @@ -143,7 +143,7 @@ namespace dr114 { // dr114: yes } b; // expected-error {{abstract}} } -namespace dr115 { // dr115: yes +namespace dr115 { // dr115: 3.0 template int f(T); // expected-note +{{}} template int g(T); // expected-note +{{}} template int g(T, int); // expected-note +{{}} @@ -480,7 +480,7 @@ namespace dr140 { // dr140: yes void g(int n) { n = 2; } } -namespace dr141 { // dr141: yes +namespace dr141 { // dr141: 3.1 template void f(); template struct S { int n; }; // expected-note {{'::dr141::S::n' declared here}} struct A : S { @@ -518,7 +518,7 @@ namespace dr141 { // dr141: yes void i() { C().i(); } // ok!! } -namespace dr142 { // dr142: yes +namespace dr142 { // dr142: 2.8 class B { // expected-note +{{here}} public: int mi; // expected-note +{{here}} @@ -602,7 +602,7 @@ namespace dr148 { // dr148: yes // dr149: na -namespace dr151 { // dr151: yes +namespace dr151 { // dr151: 3.1 struct X {}; typedef int X::*p; #if __cplusplus < 201103L @@ -655,7 +655,7 @@ namespace dr159 { // dr159: 3.5 // dr160: na -namespace dr161 { // dr161: yes +namespace dr161 { // dr161: 3.1 class A { protected: struct B { int n; } b; // expected-note 2{{here}} @@ -724,7 +724,7 @@ namespace dr165 { // dr165: no void N::g() {} } -namespace dr166 { // dr166: yes +namespace dr166 { // dr166: 2.9 namespace A { class X; } template int f(T t) { return t.n; } @@ -827,7 +827,7 @@ namespace dr173 { // dr173: yes // dr174: sup 1012 -namespace dr175 { // dr175: yes +namespace dr175 { // dr175: 2.8 struct A {}; // expected-note {{here}} struct B : private A {}; // expected-note {{constrained by private inheritance}} struct C : B { @@ -836,7 +836,7 @@ namespace dr175 { // dr175: yes }; } -namespace dr176 { // dr176: yes +namespace dr176 { // dr176: 3.1 template class Y; template<> class Y { void f() { @@ -904,7 +904,7 @@ namespace dr179 { // dr179: yes int n = &f - &f; // expected-error {{arithmetic on pointers to the function type 'void ()'}} } -namespace dr180 { // dr180: yes +namespace dr180 { // dr180: 2.8 template struct X : T, T::some_base { X() : T::some_type_that_might_be_T(), T::some_base() {} friend class T::some_class; diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 7cf657a47d6409..141b2aa515ad9a 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -685,7 +685,7 @@

C++ defect report implementation status

108 TC1 Are classes nested in templates dependent? - Yes + Clang 2.9 109 @@ -727,7 +727,7 @@

C++ defect report implementation status

115 CD1 Address of template-id - Yes + Clang 3.0 116 @@ -883,13 +883,13 @@

C++ defect report implementation status

141 CD1 Non-member function templates in member access expressions - Yes + Clang 3.1 142 TC1 Injection-related errors in access example - Yes + Clang 2.8 143 @@ -943,7 +943,7 @@

C++ defect report implementation status

151 TC1 Terminology of zero-initialization - Yes + Clang 3.1 152 @@ -1003,7 +1003,7 @@

C++ defect report implementation status

161 TC1 Access to protected nested type - Yes + Clang 3.1 162 @@ -1033,7 +1033,7 @@

C++ defect report implementation status

166 TC1 Friend declarations of template-ids - Yes + Clang 2.9 167 @@ -1087,13 +1087,13 @@

C++ defect report implementation status

175 CD1 Class name injection and base name access - Yes + Clang 2.8 176 TC1 Name injection and templates - Yes + Clang 3.1 177 @@ -1117,7 +1117,7 @@

C++ defect report implementation status

180 CD1 typename and elaborated types - Yes + Clang 2.8 181 From 5a1020bb0083ebfcf5d8879ba99c21bf214fcb56 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 1 Dec 2023 11:40:52 +0100 Subject: [PATCH 24/72] [InstSimplify] Add test for disjoint or miscompile (NFC) The absorption case is already handled correctly, but the idempentence case is not. --- llvm/test/Transforms/InstCombine/select.ll | 43 ++++++++++++++++----- llvm/test/Transforms/InstSimplify/select.ll | 38 ++++++++++++++++++ 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index b3764cfb97d407..f1ccd4747bd1ce 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -1919,9 +1919,9 @@ define i32 @select_dominating_cond_inverted_multiple_duplicating_preds(i1 %cond, ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_FALSE:%.*]], label [[IF_TRUE:%.*]] ; CHECK: if.true: ; CHECK-NEXT: switch i32 [[COND2:%.*]], label [[SWITCH_CASE_1:%.*]] [ -; CHECK-NEXT: i32 1, label [[MERGE:%.*]] -; CHECK-NEXT: i32 2, label [[MERGE]] -; CHECK-NEXT: i32 3, label [[MERGE]] +; CHECK-NEXT: i32 1, label [[MERGE:%.*]] +; CHECK-NEXT: i32 2, label [[MERGE]] +; CHECK-NEXT: i32 3, label [[MERGE]] ; CHECK-NEXT: ] ; CHECK: switch.case.1: ; CHECK-NEXT: br label [[MERGE]] @@ -2172,13 +2172,13 @@ define i32 @test_invoke_neg(i32 %x, i32 %y) nounwind uwtable ssp personality ptr ; CHECK-LABEL: @test_invoke_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND:%.*]] = invoke i1 @foo() -; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] ; CHECK: invoke.cont: ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i32 [[X:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[SEL]] ; CHECK: lpad: ; CHECK-NEXT: [[LP:%.*]] = landingpad { i1, i32 } -; CHECK-NEXT: filter [0 x i1] zeroinitializer +; CHECK-NEXT: filter [0 x i1] zeroinitializer ; CHECK-NEXT: unreachable ; entry: @@ -2205,14 +2205,14 @@ define i32 @test_invoke_2_neg(i1 %cond, i32 %x, i32 %y) nounwind uwtable ssp per ; CHECK-NEXT: br label [[MERGE:%.*]] ; CHECK: if.false: ; CHECK-NEXT: [[RESULT:%.*]] = invoke i32 @bar() -; CHECK-NEXT: to label [[MERGE]] unwind label [[LPAD:%.*]] +; CHECK-NEXT: to label [[MERGE]] unwind label [[LPAD:%.*]] ; CHECK: merge: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[IF_TRUE]] ], [ [[RESULT]], [[IF_FALSE]] ] ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i32 1, i32 [[PHI]] ; CHECK-NEXT: ret i32 [[SEL]] ; CHECK: lpad: ; CHECK-NEXT: [[LP:%.*]] = landingpad { i1, i32 } -; CHECK-NEXT: filter [0 x i1] zeroinitializer +; CHECK-NEXT: filter [0 x i1] zeroinitializer ; CHECK-NEXT: unreachable ; entry: @@ -2242,8 +2242,8 @@ define i32 @select_phi_same_condition_switch(i1 %cond, i32 %x, i32 %y) { ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if.true: ; CHECK-NEXT: switch i32 [[X:%.*]], label [[EXIT:%.*]] [ -; CHECK-NEXT: i32 1, label [[MERGE:%.*]] -; CHECK-NEXT: i32 2, label [[MERGE]] +; CHECK-NEXT: i32 1, label [[MERGE:%.*]] +; CHECK-NEXT: i32 2, label [[MERGE]] ; CHECK-NEXT: ] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 @@ -2903,6 +2903,31 @@ define ptr @select_replacement_gep_inbounds(ptr %base, i64 %offset) { ret ptr %sel } +define i8 @replace_false_op_eq_shl_or_disjoint(i8 %x) { +; CHECK-LABEL: @replace_false_op_eq_shl_or_disjoint( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 3 +; CHECK-NEXT: [[OR:%.*]] = or i8 [[SHL]], [[X]] +; CHECK-NEXT: ret i8 [[OR]] +; + %eq0 = icmp eq i8 %x, -1 + %shl = shl i8 %x, 3 + %or = or disjoint i8 %x, %shl + %sel = select i1 %eq0, i8 -1, i8 %or + ret i8 %sel +} + +; FIXME: This is a miscompile. +define i8 @select_or_disjoint_eq(i8 %x, i8 %y) { +; CHECK-LABEL: @select_or_disjoint_eq( +; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[OR]] +; + %cmp = icmp eq i8 %x, %y + %or = or disjoint i8 %x, %y + %sel = select i1 %cmp, i8 %x, i8 %or + ret i8 %sel +} + define <2 x i1> @partial_true_undef_condval(<2 x i1> %x) { ; CHECK-LABEL: @partial_true_undef_condval( ; CHECK-NEXT: ret <2 x i1> diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 16901b88893387..473d8b8b036808 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -1429,6 +1429,21 @@ define i8 @replace_false_op_eq_shl_or(i8 %x) { ret i8 %sel } +define i8 @replace_false_op_eq_shl_or_disjoint(i8 %x) { +; CHECK-LABEL: @replace_false_op_eq_shl_or_disjoint( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i8 [[X:%.*]], -1 +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X]], 3 +; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X]], [[SHL]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i8 -1, i8 [[OR]] +; CHECK-NEXT: ret i8 [[SEL]] +; + %eq0 = icmp eq i8 %x, -1 + %shl = shl i8 %x, 3 + %or = or disjoint i8 %x, %shl + %sel = select i1 %eq0, i8 -1, i8 %or + ret i8 %sel +} + ; negative test - wrong cmp predicate define i8 @replace_false_op_sgt_neg_and(i8 %x) { @@ -1698,3 +1713,26 @@ define i8 @select_xor_cmp_unmatched_operands(i8 %0, i8 %1, i8 %c) { %5 = select i1 %3, i8 0, i8 %4 ret i8 %5 } + +define i8 @select_or_eq(i8 %x, i8 %y) { +; CHECK-LABEL: @select_or_eq( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[OR]] +; + %cmp = icmp eq i8 %x, %y + %or = or i8 %x, %y + %sel = select i1 %cmp, i8 %x, i8 %or + ret i8 %sel +} + +; FIXME: This is a miscompile. +define i8 @select_or_disjoint_eq(i8 %x, i8 %y) { +; CHECK-LABEL: @select_or_disjoint_eq( +; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[OR]] +; + %cmp = icmp eq i8 %x, %y + %or = or disjoint i8 %x, %y + %sel = select i1 %cmp, i8 %x, i8 %or + ret i8 %sel +} From cd31cf5989aaf6a187aaf3af4f94207c55a70d0f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 1 Dec 2023 11:43:17 +0100 Subject: [PATCH 25/72] [InstSimplify] Fix or disjoint miscompile with op replacement Make sure %x does not get folded to "or disjoint %x, %x" without dropping the flag, as this would be a derefinement. --- llvm/lib/Analysis/InstructionSimplify.cpp | 11 ++++++++++- llvm/test/Transforms/InstCombine/select.ll | 3 +-- llvm/test/Transforms/InstSimplify/select.ll | 7 ++++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 9f3b3f25ec3f15..cef9f6ec179ba5 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4331,8 +4331,17 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // x & x -> x, x | x -> x if ((Opcode == Instruction::And || Opcode == Instruction::Or) && - NewOps[0] == NewOps[1]) + NewOps[0] == NewOps[1]) { + // or disjoint x, x results in poison. + if (auto *PDI = dyn_cast(BO)) { + if (PDI->isDisjoint()) { + if (!DropFlags) + return nullptr; + DropFlags->push_back(BO); + } + } return NewOps[0]; + } // x - x -> 0, x ^ x -> 0. This is non-refining, because x is non-poison // by assumption and this case never wraps, so nowrap flags can be diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index f1ccd4747bd1ce..6f24758effac2f 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2916,10 +2916,9 @@ define i8 @replace_false_op_eq_shl_or_disjoint(i8 %x) { ret i8 %sel } -; FIXME: This is a miscompile. define i8 @select_or_disjoint_eq(i8 %x, i8 %y) { ; CHECK-LABEL: @select_or_disjoint_eq( -; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret i8 [[OR]] ; %cmp = icmp eq i8 %x, %y diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 473d8b8b036808..b9c79f02245ccf 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -1725,11 +1725,12 @@ define i8 @select_or_eq(i8 %x, i8 %y) { ret i8 %sel } -; FIXME: This is a miscompile. define i8 @select_or_disjoint_eq(i8 %x, i8 %y) { ; CHECK-LABEL: @select_or_disjoint_eq( -; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[OR]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X]], [[Y]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[OR]] +; CHECK-NEXT: ret i8 [[SEL]] ; %cmp = icmp eq i8 %x, %y %or = or disjoint i8 %x, %y From 89b0044ca9a6fb233f8d6dd16db6bd4acc3d3f61 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 1 Dec 2023 12:14:57 +0100 Subject: [PATCH 26/72] [InstSimplify] Add test for implied cond with equal ops and constant (NFC) --- llvm/test/Transforms/InstSimplify/implies.ll | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/implies.ll b/llvm/test/Transforms/InstSimplify/implies.ll index 41f1995428473a..75044f4d9a356f 100644 --- a/llvm/test/Transforms/InstSimplify/implies.ll +++ b/llvm/test/Transforms/InstSimplify/implies.ll @@ -499,4 +499,30 @@ define i1 @lshr_value(i32 %length.i, i32 %i, i32 %v) { ret i1 %res } +define i1 @same_ops_with_constant(i8 %x) { +; CHECK-LABEL: @same_ops_with_constant( +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i8 [[X:%.*]], 5 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i8 [[X]], 5 +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[RES]] +; + %cmp1 = icmp sgt i8 %x, 5 + %cmp2 = icmp ugt i8 %x, 5 + %res = icmp ule i1 %cmp1, %cmp2 + ret i1 %res +} + +define i1 @same_ops_with_constant_wrong_sign(i8 %x) { +; CHECK-LABEL: @same_ops_with_constant_wrong_sign( +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i8 [[X:%.*]], -5 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i8 [[X]], -5 +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[RES]] +; + %cmp1 = icmp sgt i8 %x, -5 + %cmp2 = icmp ugt i8 %x, -5 + %res = icmp ule i1 %cmp1, %cmp2 + ret i1 %res +} + declare void @llvm.assume(i1) From 460faa0c87f0a9496cdaf6c856aff1886e29afe3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 1 Dec 2023 12:15:58 +0100 Subject: [PATCH 27/72] [InstSimplify] Check common operand with constant earlier If both icmps have the same operands and the RHS is constant, we would currently go into the isImpliedCondMatchingOperands() code path, instead of the isImpliedCondCommonOperandWithConstants() path. Both are correct, but the latter can produce more accurate results if the implication is dependent on the sign. --- llvm/lib/Analysis/ValueTracking.cpp | 10 +++++----- llvm/test/Transforms/InstSimplify/implies.ll | 5 +---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 9cfe7315a7a4dc..d8a72c9f7b989d 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -8352,17 +8352,17 @@ static std::optional isImpliedCondICmps(const ICmpInst *LHS, CmpInst::Predicate LPred = LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); - // Can we infer anything when the two compares have matching operands? - bool AreSwappedOps; - if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) - return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); - // Can we infer anything when the 0-operands match and the 1-operands are // constants (not necessarily matching)? const APInt *LC, *RC; if (L0 == R0 && match(L1, m_APInt(LC)) && match(R1, m_APInt(RC))) return isImpliedCondCommonOperandWithConstants(LPred, *LC, RPred, *RC); + // Can we infer anything when the two compares have matching operands? + bool AreSwappedOps; + if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) + return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); + // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 Date: Fri, 1 Dec 2023 11:29:19 +0000 Subject: [PATCH 28/72] TargetInstrInfo: make getOperandLatency return optional (NFC) (#73769) getOperandLatency has the following behavior: it returns -1 as a special value, negative numbers other than -1 on some target-specific overrides, or a valid non-negative latency. This behavior can be surprising, as some callers do arithmetic on these negative values. Change the interface of getOperandLatency to return a std::optional to prevent surprises in callers. While at it, change the interface of getInstrLatency to return unsigned instead of int. This change was inspired by a refactoring in TargetSchedModel::computeOperandLatency. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 18 +-- llvm/include/llvm/MC/MCInstrItineraries.h | 40 +++--- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 9 +- llvm/lib/CodeGen/TargetInstrInfo.cpp | 23 ++-- llvm/lib/CodeGen/TargetSchedule.cpp | 32 +++-- llvm/lib/MC/MCDisassembler/Disassembler.cpp | 11 +- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 117 +++++++++--------- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 76 ++++++------ llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 14 +-- llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 9 +- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 27 ++-- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 23 ++-- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 16 +-- 13 files changed, 209 insertions(+), 206 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 58355a32315b23..282fecc3ea81c0 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1706,9 +1706,9 @@ class TargetInstrInfo : public MCInstrInfo { return Opcode <= TargetOpcode::COPY; } - virtual int getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const; + virtual std::optional + getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, + unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; /// Compute and return the use operand latency of a given pair of def and use. /// In most cases, the static scheduling itinerary was enough to determine the @@ -1718,10 +1718,10 @@ class TargetInstrInfo : public MCInstrInfo { /// This is a raw interface to the itinerary that may be directly overridden /// by a target. Use computeOperandLatency to get the best estimate of /// latency. - virtual int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const; + virtual std::optional + getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, unsigned UseIdx) const; /// Compute the instruction latency of a given instruction. /// If the instruction has higher cost when predicated, it's returned via @@ -1732,8 +1732,8 @@ class TargetInstrInfo : public MCInstrInfo { virtual unsigned getPredicationCost(const MachineInstr &MI) const; - virtual int getInstrLatency(const InstrItineraryData *ItinData, - SDNode *Node) const; + virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const; /// Return the default expected latency for a def based on its opcode. unsigned defaultDefLatency(const MCSchedModel &SchedModel, diff --git a/llvm/include/llvm/MC/MCInstrItineraries.h b/llvm/include/llvm/MC/MCInstrItineraries.h index 652922feddc338..b17c41ce3aa4a1 100644 --- a/llvm/include/llvm/MC/MCInstrItineraries.h +++ b/llvm/include/llvm/MC/MCInstrItineraries.h @@ -17,6 +17,7 @@ #include "llvm/MC/MCSchedule.h" #include +#include namespace llvm { @@ -162,18 +163,19 @@ class InstrItineraryData { return Latency; } - /// Return the cycle for the given class and operand. Return -1 if no - /// cycle is specified for the operand. - int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const { + /// Return the cycle for the given class and operand. Return std::nullopt if + /// the information is not available for the operand. + std::optional getOperandCycle(unsigned ItinClassIndx, + unsigned OperandIdx) const { if (isEmpty()) - return -1; + return std::nullopt; unsigned FirstIdx = Itineraries[ItinClassIndx].FirstOperandCycle; unsigned LastIdx = Itineraries[ItinClassIndx].LastOperandCycle; if ((FirstIdx + OperandIdx) >= LastIdx) - return -1; + return std::nullopt; - return (int)OperandCycles[FirstIdx + OperandIdx]; + return OperandCycles[FirstIdx + OperandIdx]; } /// Return true if there is a pipeline forwarding between instructions @@ -201,25 +203,27 @@ class InstrItineraryData { /// Compute and return the use operand latency of a given itinerary /// class and operand index if the value is produced by an instruction of the - /// specified itinerary class and def operand index. - int getOperandLatency(unsigned DefClass, unsigned DefIdx, - unsigned UseClass, unsigned UseIdx) const { + /// specified itinerary class and def operand index. Return std::nullopt if + /// the information is not available for the operand. + std::optional getOperandLatency(unsigned DefClass, unsigned DefIdx, + unsigned UseClass, + unsigned UseIdx) const { if (isEmpty()) - return -1; + return std::nullopt; - int DefCycle = getOperandCycle(DefClass, DefIdx); - if (DefCycle == -1) - return -1; + std::optional DefCycle = getOperandCycle(DefClass, DefIdx); + std::optional UseCycle = getOperandCycle(UseClass, UseIdx); + if (!DefCycle || !UseCycle) + return std::nullopt; - int UseCycle = getOperandCycle(UseClass, UseIdx); - if (UseCycle == -1) - return -1; + if (UseCycle > *DefCycle + 1) + return std::nullopt; - UseCycle = DefCycle - UseCycle + 1; + UseCycle = *DefCycle - *UseCycle + 1; if (UseCycle > 0 && hasPipelineForwarding(DefClass, DefIdx, UseClass, UseIdx)) // FIXME: This assumes one cycle benefit for every pipeline forwarding. - --UseCycle; + UseCycle = *UseCycle - 1; return UseCycle; } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 0579c1664d5c9a..4d6d350c46f5af 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -659,7 +659,8 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, if (Use->isMachineOpcode()) // Adjust the use operand index by num of defs. OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); - int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); + std::optional Latency = + TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast(Use->getOperand(1))->getReg(); @@ -667,10 +668,10 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? - Latency = Latency - 1; + Latency = *Latency - 1; } - if (Latency >= 0) - dep.setLatency(Latency); + if (Latency) + dep.setLatency(*Latency); } void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const { diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index ac056fea8c3794..fbb7c81fa1f86f 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1379,15 +1379,15 @@ bool TargetInstrInfo::getMemOperandWithOffset( // SelectionDAG latency interface. //===----------------------------------------------------------------------===// -int +std::optional TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const { if (!ItinData || ItinData->isEmpty()) - return -1; + return std::nullopt; if (!DefNode->isMachineOpcode()) - return -1; + return std::nullopt; unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); if (!UseNode->isMachineOpcode()) @@ -1396,8 +1396,8 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); } -int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - SDNode *N) const { +unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { if (!ItinData || ItinData->isEmpty()) return 1; @@ -1461,8 +1461,9 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, return false; unsigned DefClass = DefMI.getDesc().getSchedClass(); - int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); - return (DefCycle != -1 && DefCycle <= 1); + std::optional DefCycle = + ItinData->getOperandCycle(DefClass, DefIdx); + return DefCycle <= 1; } bool TargetInstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { @@ -1580,11 +1581,9 @@ unsigned TargetInstrInfo::getCallFrameSizeAt(MachineInstr &MI) const { /// Both DefMI and UseMI must be valid. By default, call directly to the /// itinerary. This may be overriden by the target. -int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, - unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const { +std::optional TargetInstrInfo::getOperandLatency( + const InstrItineraryData *ItinData, const MachineInstr &DefMI, + unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { unsigned DefClass = DefMI.getDesc().getSchedClass(); unsigned UseClass = UseMI.getDesc().getSchedClass(); return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp index 3cedb38de2ad8d..a25d4ff78f4d96 100644 --- a/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/llvm/lib/CodeGen/TargetSchedule.cpp @@ -168,16 +168,20 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { return UseIdx; } -// Top-level API for clients that know the operand indices. +// Top-level API for clients that know the operand indices. This doesn't need to +// return std::optional, as it always returns a valid latency. unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const { + const unsigned InstrLatency = computeInstrLatency(DefMI); + const unsigned DefaultDefLatency = TII->defaultDefLatency(SchedModel, *DefMI); + if (!hasInstrSchedModel() && !hasInstrItineraries()) - return TII->defaultDefLatency(SchedModel, *DefMI); + return InstrLatency; if (hasInstrItineraries()) { - int OperLatency = 0; + std::optional OperLatency; if (UseMI) { OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx, *UseMI, UseOperIdx); @@ -186,21 +190,13 @@ unsigned TargetSchedModel::computeOperandLatency( unsigned DefClass = DefMI->getDesc().getSchedClass(); OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); } - if (OperLatency >= 0) - return OperLatency; - - // No operand latency was found. - unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - // Rather than directly querying InstrItins stage latency, we call a TII - // hook to allow subtargets to specialize latency. This hook is only - // applicable to the InstrItins model. InstrSchedModel should model all - // special cases without TII hooks. - InstrLatency = - std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI)); - return InstrLatency; + + // Expected latency is the max of InstrLatency and DefaultDefLatency, if we + // didn't find an operand latency. + return OperLatency ? *OperLatency + : std::max(InstrLatency, DefaultDefLatency); } + // hasInstrSchedModel() const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); @@ -237,7 +233,7 @@ unsigned TargetSchedModel::computeOperandLatency( // FIXME: Automatically giving all implicit defs defaultDefLatency is // undesirable. We should only do it for defs that are known to the MC // desc like flags. Truly implicit defs should get 1 cycle latency. - return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI); + return DefMI->isTransient() ? 0 : DefaultDefLatency; } unsigned diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp index 067b951fbfccb3..5e5a163c290244 100644 --- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp @@ -180,12 +180,13 @@ static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) { const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode()); unsigned SCClass = Desc.getSchedClass(); - int Latency = 0; - for (unsigned OpIdx = 0, OpIdxEnd = Inst.getNumOperands(); OpIdx != OpIdxEnd; - ++OpIdx) - Latency = std::max(Latency, IID.getOperandCycle(SCClass, OpIdx)); + unsigned Latency = 0; - return Latency; + for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx) + if (std::optional OperCycle = IID.getOperandCycle(SCClass, Idx)) + Latency = std::max(Latency, *OperCycle); + + return (int)Latency; } /// Gets latency information for \p Inst, based on \p DC information. diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index c09879fd9c2beb..94f34b12769660 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3872,17 +3872,16 @@ unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, llvm_unreachable("Didn't find the number of microops"); } -int +std::optional ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &DefMCID, - unsigned DefClass, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const { int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; if (RegNo <= 0) // Def is the address writeback. return ItinData->getOperandCycle(DefClass, DefIdx); - int DefCycle; + unsigned DefCycle; if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 DefCycle = RegNo / 2 + 1; @@ -3913,17 +3912,16 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return DefCycle; } -int +std::optional ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &DefMCID, - unsigned DefClass, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const { int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; if (RegNo <= 0) // Def is the address writeback. return ItinData->getOperandCycle(DefClass, DefIdx); - int DefCycle; + unsigned DefCycle; if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // 4 registers would be issued: 1, 2, 1. // 5 registers would be issued: 1, 2, 2. @@ -3948,16 +3946,15 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, return DefCycle; } -int +std::optional ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &UseMCID, - unsigned UseClass, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const { int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; if (RegNo <= 0) return ItinData->getOperandCycle(UseClass, UseIdx); - int UseCycle; + unsigned UseCycle; if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 UseCycle = RegNo / 2 + 1; @@ -3988,16 +3985,15 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, return UseCycle; } -int +std::optional ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &UseMCID, - unsigned UseClass, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const { int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; if (RegNo <= 0) return ItinData->getOperandCycle(UseClass, UseIdx); - int UseCycle; + unsigned UseCycle; if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { UseCycle = RegNo / 2; if (UseCycle < 2) @@ -4017,12 +4013,10 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, return UseCycle; } -int -ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MCInstrDesc &DefMCID, - unsigned DefIdx, unsigned DefAlign, - const MCInstrDesc &UseMCID, - unsigned UseIdx, unsigned UseAlign) const { +std::optional ARMBaseInstrInfo::getOperandLatency( + const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, + unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID, + unsigned UseIdx, unsigned UseAlign) const { unsigned DefClass = DefMCID.getSchedClass(); unsigned UseClass = UseMCID.getSchedClass(); @@ -4032,7 +4026,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // This may be a def / use of a variable_ops instruction, the operand // latency might be determinable dynamically. Let the target try to // figure it out. - int DefCycle = -1; + std::optional DefCycle; bool LdmBypass = false; switch (DefMCID.getOpcode()) { default: @@ -4070,11 +4064,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; } - if (DefCycle == -1) + if (!DefCycle) // We can't seem to determine the result latency of the def, assume it's 2. DefCycle = 2; - int UseCycle = -1; + std::optional UseCycle; switch (UseMCID.getOpcode()) { default: UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); @@ -4108,21 +4102,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; } - if (UseCycle == -1) + if (!UseCycle) // Assume it's read in the first stage. UseCycle = 1; - UseCycle = DefCycle - UseCycle + 1; + if (UseCycle > *DefCycle + 1) + return std::nullopt; + + UseCycle = *DefCycle - *UseCycle + 1; if (UseCycle > 0) { if (LdmBypass) { // It's a variable_ops instruction so we can't use DefIdx here. Just use // first def operand. if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, UseClass, UseIdx)) - --UseCycle; + UseCycle = *UseCycle - 1; } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, UseClass, UseIdx)) { - --UseCycle; + UseCycle = *UseCycle - 1; } } @@ -4362,14 +4359,12 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, return Adjust; } -int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, - unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const { +std::optional ARMBaseInstrInfo::getOperandLatency( + const InstrItineraryData *ItinData, const MachineInstr &DefMI, + unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { // No operand latency. The caller may fall back to getInstrLatency. if (!ItinData || ItinData->isEmpty()) - return -1; + return std::nullopt; const MachineOperand &DefMO = DefMI.getOperand(DefIdx); Register Reg = DefMO.getReg(); @@ -4390,7 +4385,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, ResolvedUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); if (!ResolvedUseMI) - return -1; + return std::nullopt; } return getOperandLatencyImpl( @@ -4398,7 +4393,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj); } -int ARMBaseInstrInfo::getOperandLatencyImpl( +std::optional ARMBaseInstrInfo::getOperandLatencyImpl( const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, @@ -4430,7 +4425,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl( } if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit()) - return -1; + return std::nullopt; unsigned DefAlign = DefMI.hasOneMemOperand() ? (*DefMI.memoperands_begin())->getAlign().value() @@ -4440,25 +4435,25 @@ int ARMBaseInstrInfo::getOperandLatencyImpl( : 0; // Get the itinerary's latency if possible, and handle variable_ops. - int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, - UseIdx, UseAlign); + std::optional Latency = getOperandLatency( + ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); // Unable to find operand latency. The caller may resort to getInstrLatency. - if (Latency < 0) - return Latency; + if (!Latency) + return std::nullopt; // Adjust for IT block position. int Adj = DefAdj + UseAdj; // Adjust for dynamic def-side opcode variants not captured by the itinerary. Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); - if (Adj >= 0 || (int)Latency > -Adj) { - return Latency + Adj; + if (Adj >= 0 || (int)*Latency > -Adj) { + return *Latency + Adj; } // Return the itinerary latency, which may be zero but not less than zero. return Latency; } -int +std::optional ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const { @@ -4474,10 +4469,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return DefMCID.mayLoad() ? 3 : 1; if (!UseNode->isMachineOpcode()) { - int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); + std::optional Latency = + ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); int Threshold = 1 + Adj; - return Latency <= Threshold ? 1 : Latency - Adj; + return !Latency || Latency <= Threshold ? 1 : *Latency - Adj; } const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); @@ -4489,8 +4485,10 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned UseAlign = !UseMN->memoperands_empty() ? (*UseMN->memoperands_begin())->getAlign().value() : 0; - int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, - UseMCID, UseIdx, UseAlign); + std::optional Latency = getOperandLatency( + ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); + if (!Latency) + return std::nullopt; if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isLikeA9() || @@ -4506,7 +4504,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (ShImm == 0 || (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) - --Latency; + Latency = *Latency - 1; break; } case ARM::t2LDRs: @@ -4517,7 +4515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned ShAmt = cast(DefNode->getOperand(2))->getZExtValue(); if (ShAmt == 0 || ShAmt == 2) - --Latency; + Latency = *Latency - 1; break; } } @@ -4534,9 +4532,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (ShImm == 0 || ((ShImm == 1 || ShImm == 2 || ShImm == 3) && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) - Latency -= 2; + Latency = *Latency - 2; else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) - --Latency; + Latency = *Latency - 1; break; } case ARM::t2LDRs: @@ -4544,7 +4542,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDRHs: case ARM::t2LDRSHs: // Thumb2 mode: lsl 0-3 only. - Latency -= 2; + Latency = *Latency - 2; break; } } @@ -4710,7 +4708,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4LNq32Pseudo_UPD: // If the address is not 64-bit aligned, the latencies of these // instructions increases by one. - ++Latency; + Latency = *Latency + 1; break; } @@ -4787,8 +4785,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return Latency; } -int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - SDNode *Node) const { +unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const { if (!Node->isMachineOpcode()) return 1; @@ -4836,8 +4834,9 @@ bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; if (DDomain == ARMII::DomainGeneral) { unsigned DefClass = DefMI.getDesc().getSchedClass(); - int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); - return (DefCycle != -1 && DefCycle <= 2); + std::optional DefCycle = + ItinData->getOperandCycle(DefClass, DefIdx); + return DefCycle <= 2; } return false; } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 5efcc1a0d9fc07..6aebf3b64e8d43 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -316,13 +316,15 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const override; - int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const override; - int getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const override; + std::optional getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, + unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const override; + std::optional getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, + unsigned UseIdx) const override; /// VFP/NEON execution domains. std::pair @@ -421,34 +423,34 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { unsigned getInstBundleLength(const MachineInstr &MI) const; - int getVLDMDefCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &DefMCID, - unsigned DefClass, - unsigned DefIdx, unsigned DefAlign) const; - int getLDMDefCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &DefMCID, - unsigned DefClass, - unsigned DefIdx, unsigned DefAlign) const; - int getVSTMUseCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &UseMCID, - unsigned UseClass, - unsigned UseIdx, unsigned UseAlign) const; - int getSTMUseCycle(const InstrItineraryData *ItinData, - const MCInstrDesc &UseMCID, - unsigned UseClass, - unsigned UseIdx, unsigned UseAlign) const; - int getOperandLatency(const InstrItineraryData *ItinData, - const MCInstrDesc &DefMCID, - unsigned DefIdx, unsigned DefAlign, - const MCInstrDesc &UseMCID, - unsigned UseIdx, unsigned UseAlign) const; - - int getOperandLatencyImpl(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, unsigned DefIdx, - const MCInstrDesc &DefMCID, unsigned DefAdj, - const MachineOperand &DefMO, unsigned Reg, - const MachineInstr &UseMI, unsigned UseIdx, - const MCInstrDesc &UseMCID, unsigned UseAdj) const; + std::optional getVLDMDefCycle(const InstrItineraryData *ItinData, + const MCInstrDesc &DefMCID, + unsigned DefClass, unsigned DefIdx, + unsigned DefAlign) const; + std::optional getLDMDefCycle(const InstrItineraryData *ItinData, + const MCInstrDesc &DefMCID, + unsigned DefClass, unsigned DefIdx, + unsigned DefAlign) const; + std::optional getVSTMUseCycle(const InstrItineraryData *ItinData, + const MCInstrDesc &UseMCID, + unsigned UseClass, unsigned UseIdx, + unsigned UseAlign) const; + std::optional getSTMUseCycle(const InstrItineraryData *ItinData, + const MCInstrDesc &UseMCID, + unsigned UseClass, unsigned UseIdx, + unsigned UseAlign) const; + std::optional getOperandLatency(const InstrItineraryData *ItinData, + const MCInstrDesc &DefMCID, + unsigned DefIdx, unsigned DefAlign, + const MCInstrDesc &UseMCID, + unsigned UseIdx, + unsigned UseAlign) const; + + std::optional getOperandLatencyImpl( + const InstrItineraryData *ItinData, const MachineInstr &DefMI, + unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, + const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, + unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const; unsigned getPredicationCost(const MachineInstr &MI) const override; @@ -456,8 +458,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { const MachineInstr &MI, unsigned *PredCost = nullptr) const override; - int getInstrLatency(const InstrItineraryData *ItinData, - SDNode *Node) const override; + unsigned getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const override; bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 6f0210763bc5f3..1689b8f1e132d5 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -4295,11 +4295,9 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency( /// /// This is a raw interface to the itinerary that may be directly overriden by /// a target. Use computeOperandLatency to get the best estimate of latency. -int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, - unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const { +std::optional HexagonInstrInfo::getOperandLatency( + const InstrItineraryData *ItinData, const MachineInstr &DefMI, + unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); // Get DefIdx and UseIdx for super registers. @@ -4328,9 +4326,9 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } - int Latency = TargetInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, - UseMI, UseIdx); - if (!Latency) + std::optional Latency = TargetInstrInfo::getOperandLatency( + ItinData, DefMI, DefIdx, UseMI, UseIdx); + if (Latency == 0) // We should never have 0 cycle latency between two instructions unless // they can be packetized together. However, this decision can't be made // here. diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 0bc0877f6e7067..645b57f4664df2 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -309,10 +309,11 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// /// This is a raw interface to the itinerary that may be directly overriden by /// a target. Use computeOperandLatency to get the best estimate of latency. - int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const override; + std::optional getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, + unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const override; /// Decompose the machine operand's target flags into two values - the direct /// target flag value and any of bit flags that are applied. diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 1c9c258df9475f..e1ad15bbc7c17a 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -467,7 +467,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx, // default. if ((DstInst->isRegSequence() || DstInst->isCopy())) { Register DReg = DstInst->getOperand(0).getReg(); - int DLatency = -1; + std::optional DLatency; for (const auto &DDep : Dst->Succs) { MachineInstr *DDst = DDep.getSUnit()->getInstr(); int UseIdx = -1; @@ -482,21 +482,21 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx, if (UseIdx == -1) continue; - int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, - *DDst, UseIdx)); + std::optional Latency = + InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx); + // Set DLatency for the first time. - DLatency = (DLatency == -1) ? Latency : DLatency; + if (!DLatency) + DLatency = Latency; // For multiple uses, if the Latency is different across uses, reset // DLatency. if (DLatency != Latency) { - DLatency = -1; + DLatency = std::nullopt; break; } } - - DLatency = std::max(DLatency, 0); - Dep.setLatency((unsigned)DLatency); + Dep.setLatency(DLatency ? *DLatency : 0); } // Try to schedule uses near definitions to generate .cur. @@ -581,15 +581,16 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) { const MachineOperand &MO = DstI->getOperand(OpNum); if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) { - int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcI, - DefIdx, *DstI, OpNum)); + std::optional Latency = InstrInfo.getOperandLatency( + &InstrItins, *SrcI, DefIdx, *DstI, OpNum); // For some instructions (ex: COPY), we might end up with < 0 latency // as they don't have any Itinerary class associated with them. - Latency = std::max(Latency, 0); + if (!Latency) + Latency = 0; bool IsArtificial = I.isArtificial(); - Latency = updateLatency(*SrcI, *DstI, IsArtificial, Latency); - I.setLatency(Latency); + Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency); + I.setLatency(*Latency); } } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 6784049348b163..49d003db8ffc9a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -155,22 +155,21 @@ unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) continue; - int Cycle = ItinData->getOperandCycle(DefClass, i); - if (Cycle < 0) + std::optional Cycle = ItinData->getOperandCycle(DefClass, i); + if (!Cycle) continue; - Latency = std::max(Latency, (unsigned) Cycle); + Latency = std::max(Latency, *Cycle); } return Latency; } -int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const { - int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, - UseMI, UseIdx); +std::optional PPCInstrInfo::getOperandLatency( + const InstrItineraryData *ItinData, const MachineInstr &DefMI, + unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { + std::optional Latency = PPCGenInstrInfo::getOperandLatency( + ItinData, DefMI, DefIdx, UseMI, UseIdx); if (!DefMI.getParent()) return Latency; @@ -190,7 +189,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (UseMI.isBranch() && IsRegCR) { - if (Latency < 0) + if (!Latency) Latency = getInstrLatency(ItinData, DefMI); // On some cores, there is an additional delay between writing to a condition @@ -210,8 +209,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case PPC::DIR_PWR7: case PPC::DIR_PWR8: // FIXME: Is this needed for POWER9? - Latency += 2; - break; + Latency = *Latency + 2; + break; } } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 31e9859a41739a..a8dc7d6d0e37a2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -294,13 +294,15 @@ class PPCInstrInfo : public PPCGenInstrInfo { const MachineInstr &MI, unsigned *PredCost = nullptr) const override; - int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI, unsigned DefIdx, - const MachineInstr &UseMI, - unsigned UseIdx) const override; - int getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const override { + std::optional getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, + unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const override; + std::optional getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, + unsigned UseIdx) const override { return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, UseNode, UseIdx); } From 5a32014d82334c4c66c8cc7ae3ed2a489c07db07 Mon Sep 17 00:00:00 2001 From: "Oleksandr \"Alex\" Zinenko" Date: Fri, 1 Dec 2023 12:53:35 +0100 Subject: [PATCH 29/72] [mlir] update linalg transform ops docs --- .../Dialect/Linalg/TransformOps/LinalgTransformOps.td | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index fb660c64612663..002926ff965fd1 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -1910,16 +1910,20 @@ def TileUsingForallOp : #### Example using `num_threads` ``` - %0 = pdl_match @match_matmul in %arg1 + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + : (!transform.any_op) -> !transform.any_op %3:2 = transform.structured.tile_using_forall %0 num_threads [10, 20] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op) ``` #### Example using `tile_sizes` ``` - %0 = pdl_match @match_matmul in %arg1 - %sz = pdl_match @match_size_op in %arg1 + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %sz = transform.structured.match ... %3:2 = transform.structured.tile_using_forall %0 tile_sizes [0, %sz, 20] + : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) ``` }]; From 69020827cf611170d0bc80879114a2427aa39960 Mon Sep 17 00:00:00 2001 From: Shivam Gupta Date: Fri, 1 Dec 2023 17:27:19 +0530 Subject: [PATCH 30/72] [NFC] Remove a space in CMake.rst The rendered document is not correctly indentated because of this space. --- llvm/docs/CMake.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 4b86eb9c01d26b..7dd3fd26022e5c 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -389,7 +389,7 @@ enabled sub-projects. Nearly all of these variable names begin with will limit code coverage summaries to just the listed directories. If unset, coverage reports will include all sources identified by the tooling. - **LLVM_INDIVIDUAL_TEST_COVERAGE**: BOOL +**LLVM_INDIVIDUAL_TEST_COVERAGE**:BOOL Enable individual test case coverage. When set to ON, code coverage data for each test case will be generated and stored in a separate directory under the config.test_exec_root path. This feature allows code coverage analysis of each From 8727982bdfb84ce4adbd138c146a6b7ecaf98fdb Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Fri, 1 Dec 2023 12:00:18 +0000 Subject: [PATCH 31/72] [Driver] Add exclusive-group feature to multilib.yaml. (#69447) This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same mutually exclusive group, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an exclusive group matches the given flags, the last one wins. The syntax for specifying this in multilib.yaml is to define a Groups section in which you specify your group names, and for each one, declare it to have Type: Exclusive. (This reserves space in the syntax for maybe adding other group types later, such as a group of mutually _dependent_ things that you must have all or none of.) Then each Variant record that's a member of a group has a Group: property giving that group's name. --- clang/include/clang/Driver/Multilib.h | 16 ++- clang/lib/Driver/Multilib.cpp | 108 ++++++++++++++++-- .../baremetal-multilib-exclusive-group.yaml | 79 +++++++++++++ .../baremetal-multilib-group-error.yaml | 27 +++++ 4 files changed, 218 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml create mode 100644 clang/test/Driver/baremetal-multilib-group-error.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894..6a9533e6dd831f 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db..7681c1a3ce6756 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -9,6 +9,7 @@ #include "clang/Driver/Multilib.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/Version.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -29,9 +30,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +98,37 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), - [&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) - if (!FlagSet.contains(F)) - return false; - return true; - }); + + // Decide which multilibs we're going to select at all. + llvm::DenseSet ExclusiveGroupsSelected; + for (const Multilib &M : llvm::reverse(Multilibs)) { + // If this multilib doesn't match all our flags, don't select it. + if (!llvm::all_of(M.flags(), [&FlagSet](const std::string &F) { + return FlagSet.contains(F); + })) + continue; + + const std::string &group = M.exclusiveGroup(); + if (!group.empty()) { + // If this multilib has the same ExclusiveGroup as one we've already + // selected, skip it. We're iterating in reverse order, so the group + // member we've selected already is preferred. + // + // Otherwise, add the group name to the set of groups we've already + // selected a member of. + auto [It, Inserted] = ExclusiveGroupsSelected.insert(group); + if (!Inserted) + continue; + } + + // Select this multilib. + Selected.push_back(M); + } + + // We iterated in reverse order, so now put Selected back the right way + // round. + std::reverse(Selected.begin(), Selected.end()); + return !Selected.empty(); } @@ -138,10 +164,39 @@ static const VersionTuple MultilibVersionCurrent(1, 0); struct MultilibSerialization { std::string Dir; std::vector Flags; + std::string Group; +}; + +enum class MultilibGroupType { + /* + * The only group type currently supported is 'Exclusive', which indicates a + * group of multilibs of which at most one may be selected. + */ + Exclusive, + + /* + * Future possibility: a second group type indicating a set of library + * directories that are mutually _dependent_ rather than mutually exclusive: + * if you include one you must include them all. + * + * It might also be useful to allow groups to be members of other groups, so + * that a mutually exclusive group could contain a mutually dependent set of + * library directories, or vice versa. + * + * These additional features would need changes in the implementation, but + * the YAML schema is set up so they can be added without requiring changes + * in existing users' multilib.yaml files. + */ +}; + +struct MultilibGroupSerialization { + std::string Name; + MultilibGroupType Type; }; struct MultilibSetSerialization { llvm::VersionTuple MultilibVersion; + std::vector Groups; std::vector Multilibs; std::vector FlagMatchers; }; @@ -152,6 +207,7 @@ template <> struct llvm::yaml::MappingTraits { static void mapping(llvm::yaml::IO &io, MultilibSerialization &V) { io.mapRequired("Dir", V.Dir); io.mapRequired("Flags", V.Flags); + io.mapOptional("Group", V.Group); } static std::string validate(IO &io, MultilibSerialization &V) { if (StringRef(V.Dir).starts_with("/")) @@ -160,6 +216,19 @@ template <> struct llvm::yaml::MappingTraits { } }; +template <> struct llvm::yaml::ScalarEnumerationTraits { + static void enumeration(IO &io, MultilibGroupType &Val) { + io.enumCase(Val, "Exclusive", MultilibGroupType::Exclusive); + } +}; + +template <> struct llvm::yaml::MappingTraits { + static void mapping(llvm::yaml::IO &io, MultilibGroupSerialization &V) { + io.mapRequired("Name", V.Name); + io.mapRequired("Type", V.Type); + } +}; + template <> struct llvm::yaml::MappingTraits { static void mapping(llvm::yaml::IO &io, MultilibSet::FlagMatcher &M) { io.mapRequired("Match", M.Match); @@ -180,6 +249,7 @@ template <> struct llvm::yaml::MappingTraits { static void mapping(llvm::yaml::IO &io, MultilibSetSerialization &M) { io.mapRequired("MultilibVersion", M.MultilibVersion); io.mapRequired("Variants", M.Multilibs); + io.mapOptional("Groups", M.Groups); io.mapOptional("Mappings", M.FlagMatchers); } static std::string validate(IO &io, MultilibSetSerialization &M) { @@ -191,11 +261,25 @@ template <> struct llvm::yaml::MappingTraits { if (M.MultilibVersion.getMinor() > MultilibVersionCurrent.getMinor()) return "multilib version " + M.MultilibVersion.getAsString() + " is unsupported"; + for (const MultilibSerialization &Lib : M.Multilibs) { + if (!Lib.Group.empty()) { + bool Found = false; + for (const MultilibGroupSerialization &Group : M.Groups) + if (Group.Name == Lib.Group) { + Found = true; + break; + } + if (!Found) + return "multilib \"" + Lib.Dir + + "\" specifies undefined group name \"" + Lib.Group + "\""; + } + } return std::string{}; } }; LLVM_YAML_IS_SEQUENCE_VECTOR(MultilibSerialization) +LLVM_YAML_IS_SEQUENCE_VECTOR(MultilibGroupSerialization) LLVM_YAML_IS_SEQUENCE_VECTOR(MultilibSet::FlagMatcher) llvm::ErrorOr @@ -214,7 +298,11 @@ MultilibSet::parseYaml(llvm::MemoryBufferRef Input, std::string Dir; if (M.Dir != ".") Dir = "/" + M.Dir; - Multilibs.emplace_back(Dir, Dir, Dir, M.Flags); + // We transfer M.Group straight into the ExclusiveGroup parameter for the + // Multilib constructor. If we later support more than one type of group, + // we'll have to look up the group name in MS.Groups, check its type, and + // decide what to do here. + Multilibs.emplace_back(Dir, Dir, Dir, M.Flags, M.Group); } return MultilibSet(std::move(Multilibs), std::move(MS.FlagMatchers)); diff --git a/clang/test/Driver/baremetal-multilib-exclusive-group.yaml b/clang/test/Driver/baremetal-multilib-exclusive-group.yaml new file mode 100644 index 00000000000000..a98549efea4f0a --- /dev/null +++ b/clang/test/Driver/baremetal-multilib-exclusive-group.yaml @@ -0,0 +1,79 @@ +# UNSUPPORTED: system-windows + +# RUN: rm -rf %t + +# RUN: mkdir -p %t/baremetal_multilib/bin +# RUN: ln -s %clang %t/baremetal_multilib/bin/clang + +# RUN: mkdir -p %t/baremetal_multilib/lib/clang-runtimes +# RUN: ln -s %s %t/baremetal_multilib/lib/clang-runtimes/multilib.yaml + +# RUN: %t/baremetal_multilib/bin/clang -no-canonical-prefixes -x c++ %s -### -o %t.out --target=thumbv7em-none-unknown-eabi --sysroot= 2>%t.err + +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=POS +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=NEG + +# Expected results: +# +# Due to the Mappings section, all six of these library directories should +# match the command-line flag --target=thumbv7em-none-unknown-eabi. +# +# The two "non_exclusive" directories, which don't have an ExclusiveGroup at +# all, should both be selected. So should the two "own_group", each of which +# specifies a different value of ExclusiveGroup. But the three "exclusive", +# which have the _same_ ExclusiveGroup value, should not: the third one wins. +# So we expect five of these seven directories to show up in the clang-cc1 +# command line, but not testdir1_exclusive or testdir2_exclusive. + +# POS-DAG: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir1_non_exclusive/include/c++/v1" +# POS-DAG: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_non_exclusive/include/c++/v1" +# POS-DAG: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir3_exclusive/include/c++/v1" +# POS-DAG: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir1_own_group/include/c++/v1" +# POS-DAG: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_own_group/include/c++/v1" + +# NEG-NOT: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir1_exclusive/include/c++/v1" +# NEG-NOT: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_exclusive/include/c++/v1" + +--- +MultilibVersion: 1.0 + +Groups: +- Name: actually_exclude_something + Type: Exclusive + +- Name: foo + Type: Exclusive + +- Name: bar + Type: Exclusive + +Variants: +- Dir: testdir1_non_exclusive + Flags: [--target=thumbv7m-none-unknown-eabi] + +- Dir: testdir2_non_exclusive + Flags: [--target=thumbv7em-none-unknown-eabi] + +- Dir: testdir1_exclusive + Flags: [--target=thumbv7m-none-unknown-eabi] + Group: actually_exclude_something + +- Dir: testdir2_exclusive + Flags: [--target=thumbv7em-none-unknown-eabi] + Group: actually_exclude_something + +- Dir: testdir3_exclusive + Flags: [--target=thumbv7em-none-unknown-eabi] + Group: actually_exclude_something + +- Dir: testdir1_own_group + Flags: [--target=thumbv7m-none-unknown-eabi] + Group: foo + +- Dir: testdir2_own_group + Flags: [--target=thumbv7em-none-unknown-eabi] + Group: bar + +Mappings: +- Match: --target=thumbv7em-none-unknown-eabi + Flags: [--target=thumbv7m-none-unknown-eabi] diff --git a/clang/test/Driver/baremetal-multilib-group-error.yaml b/clang/test/Driver/baremetal-multilib-group-error.yaml new file mode 100644 index 00000000000000..1e8f83fa50d244 --- /dev/null +++ b/clang/test/Driver/baremetal-multilib-group-error.yaml @@ -0,0 +1,27 @@ +# UNSUPPORTED: system-windows + +# RUN: rm -rf %t + +# RUN: mkdir -p %t/baremetal_multilib/bin +# RUN: ln -s %clang %t/baremetal_multilib/bin/clang + +# RUN: mkdir -p %t/baremetal_multilib/lib/clang-runtimes +# RUN: ln -s %s %t/baremetal_multilib/lib/clang-runtimes/multilib.yaml + +# RUN: %t/baremetal_multilib/bin/clang -no-canonical-prefixes -x c++ %s -### -o %t.out --target=thumbv7em-none-unknown-eabi --sysroot= 2>%t.err +# RUN: FileCheck %s < %t.err + +--- +MultilibVersion: 1.0 + +Groups: +- Name: group1 + Type: Nonsense + +Variants: +- Dir: testdir1 + Flags: [--target=thumbv7m-none-unknown-eabi] + Group: nonexistent_group_name + +# CHECK: error: unknown enumerated scalar +# CHECK: error: multilib "testdir1" specifies undefined group name "nonexistent_group_name" From a2e8207178432f0af30e8c9e3b905a3fd770d500 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 30 Nov 2023 16:50:57 +0000 Subject: [PATCH 32/72] [NFC][LLVMContext] Clean up DenseMapInfo classes used for APInt & APFloat. DenseMapAPIntKeyInfo looks like a redundant definition because it mirrors the default used by DenseMap when not specified. Replacing DenseMapAPFloatKeyInfo with a specialisation of DenseMapInfo allows DenseMap to be more easily used when T is an aggregate type containing an APFloat. --- llvm/lib/IR/LLVMContextImpl.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index b55107beba556c..6a20291344989d 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -73,9 +73,7 @@ class StringRef; class TypedPointerType; class ValueHandleBase; -using DenseMapAPIntKeyInfo = DenseMapInfo; - -struct DenseMapAPFloatKeyInfo { +template <> struct DenseMapInfo { static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); } static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus(), 2); @@ -1489,11 +1487,9 @@ class LLVMContextImpl { DenseMap> IntZeroConstants; DenseMap> IntOneConstants; - DenseMap, DenseMapAPIntKeyInfo> - IntConstants; + DenseMap> IntConstants; - DenseMap, DenseMapAPFloatKeyInfo> - FPConstants; + DenseMap> FPConstants; FoldingSet AttrsSet; FoldingSet AttrsLists; From 85184b4aefbd01afd6e7be57bc6c1c404b3c13ce Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 1 Dec 2023 13:33:11 +0100 Subject: [PATCH 33/72] [OpenMP] Fix libomptarget build issue (#74067) Libomptarget cannot be build because of the recent refactoring introduced in patch 148dec9fa43b : [OpenMP][NFC] Separate Envar (environment variable) handling (#73994) That patch moved handling of environment variables from libomptarget library. That's why we don't need usage of "llvm::omp::target" namespace if we handle environment variables. --- openmp/libomptarget/src/interface.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index a2f713459e1d0c..ee1bd4932442b1 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -454,7 +454,6 @@ EXTERN void __tgt_target_nowait_query(void **AsyncHandle) { // for the device operations (work/spin wait on them) or block until they are // completed (use device side blocking mechanism). This allows the runtime to // adapt itself when there are a lot of long-running target regions in-flight. - using namespace llvm::omp::target; static thread_local utils::ExponentialBackoff QueryCounter( Int64Envar("OMPTARGET_QUERY_COUNT_MAX", 10), Int64Envar("OMPTARGET_QUERY_COUNT_THRESHOLD", 5), From 808b7d220309e279cf9c3d5762cb4c9120c0955f Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 1 Dec 2023 13:55:31 +0100 Subject: [PATCH 34/72] [libc][NFC] rename LONG_DOUBLE_IS_DOUBLE into LIBC_LONG_DOUBLE_IS_FLOAT64 (#73948) --- libc/src/__support/FPUtil/generic/sqrt.h | 2 +- libc/src/__support/FPUtil/x86_64/sqrt.h | 2 +- libc/src/__support/float_to_string.h | 4 +- libc/src/__support/macros/properties/float.h | 8 ++-- libc/src/__support/str_to_float.h | 4 +- .../test/src/__support/FPUtil/fpbits_test.cpp | 2 +- libc/test/src/__support/str_to_float_test.cpp | 2 +- libc/test/src/stdio/sprintf_test.cpp | 44 +++++++++---------- libc/test/src/stdio/sscanf_test.cpp | 2 +- libc/test/src/stdlib/strtold_test.cpp | 4 +- 10 files changed, 36 insertions(+), 38 deletions(-) diff --git a/libc/src/__support/FPUtil/generic/sqrt.h b/libc/src/__support/FPUtil/generic/sqrt.h index 63e8329b066074..9c7e6a2f361c67 100644 --- a/libc/src/__support/FPUtil/generic/sqrt.h +++ b/libc/src/__support/FPUtil/generic/sqrt.h @@ -43,7 +43,7 @@ LIBC_INLINE void normalize(int &exponent, mantissa <<= shift; } -#ifdef LONG_DOUBLE_IS_DOUBLE +#ifdef LIBC_LONG_DOUBLE_IS_FLOAT64 template <> LIBC_INLINE void normalize(int &exponent, uint64_t &mantissa) { normalize(exponent, mantissa); diff --git a/libc/src/__support/FPUtil/x86_64/sqrt.h b/libc/src/__support/FPUtil/x86_64/sqrt.h index 7edba5528d6a91..cf3eb9b2f494cd 100644 --- a/libc/src/__support/FPUtil/x86_64/sqrt.h +++ b/libc/src/__support/FPUtil/x86_64/sqrt.h @@ -33,7 +33,7 @@ template <> LIBC_INLINE double sqrt(double x) { return result; } -#ifdef LONG_DOUBLE_IS_DOUBLE +#ifdef LIBC_LONG_DOUBLE_IS_FLOAT64 template <> LIBC_INLINE long double sqrt(long double x) { long double result; __asm__ __volatile__("sqrtsd %x1, %x0" : "=x"(result) : "x"(x)); diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index eb06cd9c08af28..1bb4e5c5b9246a 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -602,7 +602,7 @@ class FloatToString { } }; -#ifndef LONG_DOUBLE_IS_DOUBLE +#ifndef LIBC_LONG_DOUBLE_IS_FLOAT64 // --------------------------- LONG DOUBLE FUNCTIONS --------------------------- template <> @@ -754,7 +754,7 @@ FloatToString::get_negative_block(int block_index) { } } -#endif // LONG_DOUBLE_IS_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_FLOAT64 } // namespace LIBC_NAMESPACE diff --git a/libc/src/__support/macros/properties/float.h b/libc/src/__support/macros/properties/float.h index 4bafc3777a4714..f1679fe5111369 100644 --- a/libc/src/__support/macros/properties/float.h +++ b/libc/src/__support/macros/properties/float.h @@ -19,11 +19,9 @@ #include // LDBL_MANT_DIG // 'long double' properties. -#if (LDBL_MANT_DIG == DBL_MANT_DIG) -// TODO: Replace with LIBC_LONG_DOUBLE_IS_DOUBLE -#define LONG_DOUBLE_IS_DOUBLE -#endif -#if (LDBL_MANT_DIG == 64) +#if (LDBL_MANT_DIG == 53) +#define LIBC_LONG_DOUBLE_IS_FLOAT64 +#elif (LDBL_MANT_DIG == 64) // TODO: Replace with LIBC_LONG_DOUBLE_IS_X86_BIN80 #define SPECIAL_X86_LONG_DOUBLE #elif (LDBL_MANT_DIG == 113) diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index 81ab36dbf9471f..a9232573041426 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -221,7 +221,7 @@ eisel_lemire(ExpandedFloat init_num, return output; } -#if !defined(LONG_DOUBLE_IS_DOUBLE) +#if !defined(LIBC_LONG_DOUBLE_IS_FLOAT64) template <> LIBC_INLINE cpp::optional> eisel_lemire(ExpandedFloat init_num, @@ -516,7 +516,7 @@ template <> class ClingerConsts { static constexpr double MAX_EXACT_INT = 9007199254740991.0; }; -#if defined(LONG_DOUBLE_IS_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) template <> class ClingerConsts { public: static constexpr long double POWERS_OF_TEN_ARRAY[] = { diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index 027db8807ab226..52635cc2af0940 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -213,7 +213,7 @@ TEST(LlvmLibcFPBitsTest, X86LongDoubleType) { } #else TEST(LlvmLibcFPBitsTest, LongDoubleType) { -#if defined(LONG_DOUBLE_IS_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) return; // The tests for the "double" type cover for this case. #else using LongDoubleBits = FPBits; diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp index ae729418ebe363..c2643d9a764ee6 100644 --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -279,7 +279,7 @@ TEST(LlvmLibcStrToFloatTest, SimpleDecimalConversionExtraTypes) { EXPECT_EQ(double_result.error, 0); } -#if defined(LONG_DOUBLE_IS_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64AsLongDouble) { eisel_lemire_test(123, 0, 0x1EC00000000000, 1029); } diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp index e41579a20656b7..e2265f5efbc465 100644 --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -644,7 +644,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); -#elif defined(LONG_DOUBLE_IS_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.999999999999999999999999999ap-4"); @@ -653,7 +653,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); -#elif defined(LONG_DOUBLE_IS_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.e71b63f3ba7b580af1a52d2a7379p+3321"); @@ -662,7 +662,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); -#elif defined(LONG_DOUBLE_IS_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.0d152311513c28ce202627c06ec2p-3322"); @@ -768,7 +768,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); -#elif defined(LONG_DOUBLE_IS_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -777,7 +777,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0xf.fffffffffffffffp16380L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); -#elif defined(LONG_DOUBLE_IS_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x2.0p+16383"); @@ -1024,14 +1024,14 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { // Some float128 systems (specifically the ones used for aarch64 buildbots) // don't respect signs for long double NaNs. -#if defined(SPECIAL_X86_LONG_DOUBLE) || defined(LONG_DOUBLE_IS_DOUBLE) +#if defined(SPECIAL_X86_LONG_DOUBLE) || defined(LIBC_LONG_DOUBLE_IS_FLOAT64) written = LIBC_NAMESPACE::sprintf(buff, "%LF", -ld_nan); ASSERT_STREQ_LEN(written, buff, "-NAN"); #endif // Length Modifier Tests. - // TODO(michaelrj): Add tests for LONG_DOUBLE_IS_DOUBLE and 128 bit long + // TODO(michaelrj): Add tests for LIBC_LONG_DOUBLE_IS_FLOAT64 and 128 bit long // double systems. // TODO(michaelrj): Fix the tests to only depend on the digits the long double // is accurate for. @@ -1333,7 +1333,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.999999999999999999999999999ap-4"); @@ -1342,7 +1342,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.e71b63f3ba7b580af1a52d2a7379p+3321"); @@ -1351,7 +1351,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.0d152311513c28ce202627c06ec2p-3322"); @@ -1550,7 +1550,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -1559,7 +1559,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0xf.fffffffffffffffp16380L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x2.0p+16383"); @@ -1977,7 +1977,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.999999999999999999999999999ap-4"); @@ -1986,7 +1986,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.e71b63f3ba7b580af1a52d2a7379p+3321"); @@ -1995,7 +1995,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.0d152311513c28ce202627c06ec2p-3322"); @@ -2173,7 +2173,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -2182,7 +2182,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0xf.fffffffffffffffp16380L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x2.0p+16383"); @@ -2616,7 +2616,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.999999999999999999999999999ap-4"); @@ -2625,7 +2625,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.e71b63f3ba7b580af1a52d2a7379p+3321"); @@ -2634,7 +2634,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.0d152311513c28ce202627c06ec2p-3322"); @@ -2822,7 +2822,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -2831,7 +2831,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0xf.fffffffffffffffp16380L); #if defined(SPECIAL_X86_LONG_DOUBLE) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); - #elif defined(LONG_DOUBLE_IS_DOUBLE) + #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); #else // 128 bit long double ASSERT_STREQ_LEN(written, buff, "0x2.0p+16383"); diff --git a/libc/test/src/stdio/sscanf_test.cpp b/libc/test/src/stdio/sscanf_test.cpp index ec53c08bd9d41b..db3c48cdbf7a2d 100644 --- a/libc/test/src/stdio/sscanf_test.cpp +++ b/libc/test/src/stdio/sscanf_test.cpp @@ -322,7 +322,7 @@ TEST(LlvmLibcSScanfTest, FloatConvLengthModifier) { EXPECT_EQ(ret_val, 1); // 1e600 may be larger than the maximum long double (if long double is double). // In that case both of these should be evaluated as inf. -#ifdef LONG_DOUBLE_IS_DOUBLE +#ifdef LIBC_LONG_DOUBLE_IS_FLOAT64 EXPECT_FP_EQ(ld_result, d_inf); #else EXPECT_FP_EQ(ld_result, 1.0e600L); diff --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp index 680a93188c76d1..1ddf729689ff6c 100644 --- a/libc/test/src/stdlib/strtold_test.cpp +++ b/libc/test/src/stdlib/strtold_test.cpp @@ -16,7 +16,7 @@ #include #include -#if defined(LONG_DOUBLE_IS_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) #define SELECT_CONST(val, _, __) val #elif defined(SPECIAL_X86_LONG_DOUBLE) #define SELECT_CONST(_, val, __) val @@ -26,7 +26,7 @@ class LlvmLibcStrToLDTest : public LIBC_NAMESPACE::testing::Test { public: -#if defined(LONG_DOUBLE_IS_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) void run_test(const char *inputString, const ptrdiff_t expectedStrLen, const uint64_t expectedRawData, const int expectedErrno = 0) #else From f1d0276e4c42301155e900424ea734aca7ec97a8 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 1 Dec 2023 13:57:36 +0100 Subject: [PATCH 35/72] [libc][NFC] Rename LIBC_LONG_DOUBLE_IS_IEEE754_BIN128 to LIBC_LONG_DOUBLE_IS_FLOAT128 (#74052) To make it consistent with https://github.com/llvm/llvm-project/pull/73948 and https://github.com/llvm/llvm-project/pull/73950 --- libc/src/__support/macros/properties/float.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/src/__support/macros/properties/float.h b/libc/src/__support/macros/properties/float.h index f1679fe5111369..bd63f334660597 100644 --- a/libc/src/__support/macros/properties/float.h +++ b/libc/src/__support/macros/properties/float.h @@ -25,7 +25,7 @@ // TODO: Replace with LIBC_LONG_DOUBLE_IS_X86_BIN80 #define SPECIAL_X86_LONG_DOUBLE #elif (LDBL_MANT_DIG == 113) -#define LIBC_LONG_DOUBLE_IS_IEEE754_BIN128 +#define LIBC_LONG_DOUBLE_IS_FLOAT128 #endif // float16 support. @@ -69,13 +69,13 @@ using float16 = _Float16; using float128 = _Float128; #elif defined(LIBC_COMPILER_HAS_FLOAT128_EXTENSION) using float128 = __float128; -#elif defined(LIBC_LONG_DOUBLE_IS_IEEE754_BIN128) +#elif defined(LIBC_LONG_DOUBLE_IS_FLOAT128) using float128 = long double; #endif #if defined(LIBC_COMPILER_HAS_C23_FLOAT128) || \ defined(LIBC_COMPILER_HAS_FLOAT128_EXTENSION) || \ - defined(LIBC_LONG_DOUBLE_IS_IEEE754_BIN128) + defined(LIBC_LONG_DOUBLE_IS_FLOAT128) // TODO: Replace with LIBC_HAS_FLOAT128 #define LIBC_COMPILER_HAS_FLOAT128 #endif From 977af4252d1d60a1e9c546f0e4328b1a646ef635 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Fri, 1 Dec 2023 14:23:08 +0100 Subject: [PATCH 36/72] [libc][NFC] Rename SPECIAL_X86_LONG_DOUBLE in LIBC_LONG_DOUBLE_IS_X86_FLOAT80 (#73950) --- libc/src/__support/FPUtil/FPBits.h | 2 +- .../__support/FPUtil/ManipulationFunctions.h | 4 +- libc/src/__support/FPUtil/NormalFloat.h | 4 +- libc/src/__support/FPUtil/generic/sqrt.h | 6 +- .../FPUtil/generic/sqrt_80_bit_long_double.h | 4 +- libc/src/__support/macros/properties/float.h | 3 +- libc/src/__support/str_to_float.h | 4 +- libc/test/src/__support/str_to_float_test.cpp | 2 +- libc/test/src/stdio/sprintf_test.cpp | 57 ++++++++++--------- libc/test/src/stdlib/strtold_test.cpp | 2 +- 10 files changed, 44 insertions(+), 44 deletions(-) diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h index 76a9fc6d772bf9..f5b73440de2158 100644 --- a/libc/src/__support/FPUtil/FPBits.h +++ b/libc/src/__support/FPUtil/FPBits.h @@ -249,7 +249,7 @@ template struct FPBits { } // namespace fputil } // namespace LIBC_NAMESPACE -#ifdef SPECIAL_X86_LONG_DOUBLE +#ifdef LIBC_LONG_DOUBLE_IS_X86_FLOAT80 #include "x86_64/LongDoubleBits.h" #endif diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h index 9286deee2d92c4..9d3fd075be4711 100644 --- a/libc/src/__support/FPUtil/ManipulationFunctions.h +++ b/libc/src/__support/FPUtil/ManipulationFunctions.h @@ -186,8 +186,8 @@ LIBC_INLINE T nextafter(T from, U to) { } // namespace fputil } // namespace LIBC_NAMESPACE -#ifdef SPECIAL_X86_LONG_DOUBLE +#ifdef LIBC_LONG_DOUBLE_IS_X86_FLOAT80 #include "x86_64/NextAfterLongDouble.h" -#endif // SPECIAL_X86_LONG_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_X86_FLOAT80 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_MANIPULATIONFUNCTIONS_H diff --git a/libc/src/__support/FPUtil/NormalFloat.h b/libc/src/__support/FPUtil/NormalFloat.h index afbf97cc2b6386..d59de14fb695e8 100644 --- a/libc/src/__support/FPUtil/NormalFloat.h +++ b/libc/src/__support/FPUtil/NormalFloat.h @@ -170,7 +170,7 @@ template struct NormalFloat { } }; -#ifdef SPECIAL_X86_LONG_DOUBLE +#ifdef LIBC_LONG_DOUBLE_IS_X86_FLOAT80 template <> LIBC_INLINE void NormalFloat::init_from_bits(FPBits bits) { @@ -259,7 +259,7 @@ template <> LIBC_INLINE NormalFloat::operator long double() const { result.set_implicit_bit(1); return static_cast(result); } -#endif // SPECIAL_X86_LONG_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_X86_FLOAT80 } // namespace fputil } // namespace LIBC_NAMESPACE diff --git a/libc/src/__support/FPUtil/generic/sqrt.h b/libc/src/__support/FPUtil/generic/sqrt.h index 9c7e6a2f361c67..b93fa7a35f826b 100644 --- a/libc/src/__support/FPUtil/generic/sqrt.h +++ b/libc/src/__support/FPUtil/generic/sqrt.h @@ -28,11 +28,11 @@ template struct SpecialLongDouble { static constexpr bool VALUE = false; }; -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) template <> struct SpecialLongDouble { static constexpr bool VALUE = true; }; -#endif // SPECIAL_X86_LONG_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_X86_FLOAT80 template LIBC_INLINE void normalize(int &exponent, @@ -48,7 +48,7 @@ template <> LIBC_INLINE void normalize(int &exponent, uint64_t &mantissa) { normalize(exponent, mantissa); } -#elif !defined(SPECIAL_X86_LONG_DOUBLE) +#elif !defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) template <> LIBC_INLINE void normalize(int &exponent, UInt128 &mantissa) { const uint64_t hi_bits = static_cast(mantissa >> 64); diff --git a/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h b/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h index 713c3389051096..a3bf7e3cabad3e 100644 --- a/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h +++ b/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h @@ -34,7 +34,7 @@ LIBC_INLINE long double sqrt(long double x); // Correctly rounded SQRT for all rounding modes. // Shift-and-add algorithm. -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) LIBC_INLINE long double sqrt(long double x) { using UIntType = typename FPBits::UIntType; constexpr UIntType ONE = UIntType(1) @@ -135,7 +135,7 @@ LIBC_INLINE long double sqrt(long double x) { return out; } } -#endif // SPECIAL_X86_LONG_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_X86_FLOAT80 } // namespace x86 } // namespace fputil diff --git a/libc/src/__support/macros/properties/float.h b/libc/src/__support/macros/properties/float.h index bd63f334660597..bae51cbe8aee8e 100644 --- a/libc/src/__support/macros/properties/float.h +++ b/libc/src/__support/macros/properties/float.h @@ -22,8 +22,7 @@ #if (LDBL_MANT_DIG == 53) #define LIBC_LONG_DOUBLE_IS_FLOAT64 #elif (LDBL_MANT_DIG == 64) -// TODO: Replace with LIBC_LONG_DOUBLE_IS_X86_BIN80 -#define SPECIAL_X86_LONG_DOUBLE +#define LIBC_LONG_DOUBLE_IS_X86_FLOAT80 #elif (LDBL_MANT_DIG == 113) #define LIBC_LONG_DOUBLE_IS_FLOAT128 #endif diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index a9232573041426..a872c25e2f0998 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -89,7 +89,7 @@ template LIBC_INLINE void set_implicit_bit(fputil::FPBits &) { return; } -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) template <> LIBC_INLINE void set_implicit_bit(fputil::FPBits &result) { @@ -529,7 +529,7 @@ template <> class ClingerConsts { static constexpr long double MAX_EXACT_INT = ClingerConsts::MAX_EXACT_INT; }; -#elif defined(SPECIAL_X86_LONG_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) template <> class ClingerConsts { public: static constexpr long double POWERS_OF_TEN_ARRAY[] = { diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp index c2643d9a764ee6..f9d12d95a50bee 100644 --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -283,7 +283,7 @@ TEST(LlvmLibcStrToFloatTest, SimpleDecimalConversionExtraTypes) { TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64AsLongDouble) { eisel_lemire_test(123, 0, 0x1EC00000000000, 1029); } -#elif defined(SPECIAL_X86_LONG_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Simple) { eisel_lemire_test(123, 0, 0xf600000000000000, 16389); eisel_lemire_test(12345678901234568192u, 0, 0xab54a98ceb1f0c00, diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp index e2265f5efbc465..344853beaf9fa7 100644 --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -642,7 +642,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { // Length Modifier Tests. written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); @@ -651,7 +651,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); @@ -660,7 +660,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); @@ -766,7 +766,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { ASSERT_STREQ_LEN(written, buff, "0x0p+0"); written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -775,7 +775,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0xf.fffffffffffffffp16380L); -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); @@ -1024,7 +1024,8 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { // Some float128 systems (specifically the ones used for aarch64 buildbots) // don't respect signs for long double NaNs. -#if defined(SPECIAL_X86_LONG_DOUBLE) || defined(LIBC_LONG_DOUBLE_IS_FLOAT64) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) || \ + defined(LIBC_LONG_DOUBLE_IS_FLOAT64) written = LIBC_NAMESPACE::sprintf(buff, "%LF", -ld_nan); ASSERT_STREQ_LEN(written, buff, "-NAN"); #endif @@ -1042,7 +1043,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.Lf", -2.5L); ASSERT_STREQ_LEN(written, buff, "-2"); -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) written = LIBC_NAMESPACE::sprintf(buff, "%Lf", 1e100L); ASSERT_STREQ_LEN(written, buff, @@ -1327,11 +1328,11 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { "570449525088342437216896462077260223998756027453411520977536701491759878" "422771447006016890777855573925295187921971811871399320142563330377888532" "179817332113"); -#endif // SPECIAL_X86_LONG_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_X86_FLOAT80 /* written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); @@ -1340,7 +1341,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); @@ -1349,7 +1350,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); @@ -1548,7 +1549,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { /* written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -1557,7 +1558,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatDecimalConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%.1La", - 0xf.fffffffffffffffp16380L); #if defined(SPECIAL_X86_LONG_DOUBLE) + 0xf.fffffffffffffffp16380L); #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); @@ -1858,7 +1859,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { // Length Modifier Tests. -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) written = LIBC_NAMESPACE::sprintf(buff, "%.9Le", 1000000000500000000.1L); ASSERT_STREQ_LEN(written, buff, "1.000000001e+18"); @@ -1975,7 +1976,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { */ /* written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); @@ -1984,7 +1985,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); @@ -1993,7 +1994,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); @@ -2171,7 +2172,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { /* written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -2180,7 +2181,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatExponentConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%.1La", - 0xf.fffffffffffffffp16380L); #if defined(SPECIAL_X86_LONG_DOUBLE) + 0xf.fffffffffffffffp16380L); #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); @@ -2499,7 +2500,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { // Length Modifier Tests. -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) written = LIBC_NAMESPACE::sprintf(buff, "%Lg", 0xf.fffffffffffffffp+16380L); ASSERT_STREQ_LEN(written, buff, "1.18973e+4932"); @@ -2507,7 +2508,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { written = LIBC_NAMESPACE::sprintf(buff, "%Lg", 0xa.aaaaaaaaaaaaaabp-7L); ASSERT_STREQ_LEN(written, buff, "0.0833333"); -#endif // SPECIAL_X86_LONG_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_X86_FLOAT80 // TODO: Uncomment the below tests after long double support is added /* @@ -2614,7 +2615,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { */ /* written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.1L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.ccccccccccccccdp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.999999999999ap-4"); @@ -2623,7 +2624,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e1000L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xf.38db1f9dd3dac05p+3318"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); @@ -2632,7 +2633,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%La", 1.0e-1000L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x8.68a9188a89e1467p-3325"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x0p+0"); @@ -2806,21 +2807,21 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { written = LIBC_NAMESPACE::sprintf(buff, "%.10g", 0x1.0p-1074); ASSERT_STREQ_LEN(written, buff, "4.940656458e-324"); -#if defined(SPECIAL_X86_LONG_DOUBLE) +#if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) written = LIBC_NAMESPACE::sprintf(buff, "%.60Lg", 0xa.aaaaaaaaaaaaaabp-7L); ASSERT_STREQ_LEN( written, buff, "0.0833333333333333333355920878593448009041821933351457118988037"); -#endif // SPECIAL_X86_LONG_DOUBLE +#endif // LIBC_LONG_DOUBLE_IS_X86_FLOAT80 // Long double precision tests. // These are currently commented out because they require long double support // that isn't ready yet. /* written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L); - #if defined(SPECIAL_X86_LONG_DOUBLE) + #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0xc.dp-7"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "0x1.ap-4"); @@ -2829,7 +2830,7 @@ TEST_F(LlvmLibcSPrintfTest, FloatAutoConv) { #endif written = LIBC_NAMESPACE::sprintf(buff, "%.1La", - 0xf.fffffffffffffffp16380L); #if defined(SPECIAL_X86_LONG_DOUBLE) + 0xf.fffffffffffffffp16380L); #if defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) ASSERT_STREQ_LEN(written, buff, "0x1.0p+16384"); #elif defined(LIBC_LONG_DOUBLE_IS_FLOAT64) ASSERT_STREQ_LEN(written, buff, "inf"); diff --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp index 1ddf729689ff6c..37db385c959bf5 100644 --- a/libc/test/src/stdlib/strtold_test.cpp +++ b/libc/test/src/stdlib/strtold_test.cpp @@ -18,7 +18,7 @@ #if defined(LIBC_LONG_DOUBLE_IS_FLOAT64) #define SELECT_CONST(val, _, __) val -#elif defined(SPECIAL_X86_LONG_DOUBLE) +#elif defined(LIBC_LONG_DOUBLE_IS_X86_FLOAT80) #define SELECT_CONST(_, val, __) val #else #define SELECT_CONST(_, __, val) val From da86d4a8c956f0fcee21444eb6de9f05d39d6574 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 1 Dec 2023 14:25:16 +0100 Subject: [PATCH 37/72] [ValueTracking] Reduce duplication in haveNoCommonBitsSet() (NFC) Extract a function and call it with both operand orders, so that we don't have to explicitly commute every single pattern. --- llvm/lib/Analysis/ValueTracking.cpp | 50 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index d8a72c9f7b989d..8c29c242215d66 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -186,47 +186,30 @@ KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } -bool llvm::haveNoCommonBitsSet(const WithCache &LHSCache, - const WithCache &RHSCache, - const SimplifyQuery &SQ) { - const Value *LHS = LHSCache.getValue(); - const Value *RHS = RHSCache.getValue(); - - assert(LHS->getType() == RHS->getType() && - "LHS and RHS should have the same type"); - assert(LHS->getType()->isIntOrIntVectorTy() && - "LHS and RHS should be integers"); +static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, + const Value *RHS) { // Look for an inverted mask: (X & ~M) op (Y & M). { Value *M; if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && match(RHS, m_c_And(m_Specific(M), m_Value()))) return true; - if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && - match(LHS, m_c_And(m_Specific(M), m_Value()))) - return true; } // X op (Y & ~X) - if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) || - match(LHS, m_c_And(m_Not(m_Specific(RHS)), m_Value()))) + if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value()))) return true; // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern // for constant Y. Value *Y; - if (match(RHS, - m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) || - match(LHS, m_c_Xor(m_c_And(m_Specific(RHS), m_Value(Y)), m_Deferred(Y)))) + if (match(RHS, m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y)))) return true; // Peek through extends to find a 'not' of the other side: // (ext Y) op ext(~Y) - // (ext ~Y) op ext(Y) - if ((match(LHS, m_ZExtOrSExt(m_Value(Y))) && - match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y))))) || - (match(RHS, m_ZExtOrSExt(m_Value(Y))) && - match(LHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))))) + if (match(LHS, m_ZExtOrSExt(m_Value(Y))) && + match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y))))) return true; // Look for: (A & B) op ~(A | B) @@ -235,11 +218,26 @@ bool llvm::haveNoCommonBitsSet(const WithCache &LHSCache, if (match(LHS, m_And(m_Value(A), m_Value(B))) && match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return true; - if (match(RHS, m_And(m_Value(A), m_Value(B))) && - match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) - return true; } + return false; +} + +bool llvm::haveNoCommonBitsSet(const WithCache &LHSCache, + const WithCache &RHSCache, + const SimplifyQuery &SQ) { + const Value *LHS = LHSCache.getValue(); + const Value *RHS = RHSCache.getValue(); + + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + assert(LHS->getType()->isIntOrIntVectorTy() && + "LHS and RHS should be integers"); + + if (haveNoCommonBitsSetSpecialCases(LHS, RHS) || + haveNoCommonBitsSetSpecialCases(RHS, LHS)) + return true; + return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), RHSCache.getKnownBits(SQ)); } From 6e3b2cb46ef5b9d9d28ed337491ee7da7b296616 Mon Sep 17 00:00:00 2001 From: Eleanor Bonnici Date: Fri, 1 Dec 2023 13:54:04 +0000 Subject: [PATCH 38/72] [llvm][MC][ARM][Assembly] Emit relocations for ADRs and big-endian targets (#73834) Follow-up on https://github.com/llvm/llvm-project/pull/72873/ When ADR/LDR instructions reference a label in a different section, the offset is not known until link time, however, the assembler assumes it can resolve them in some cases. The previous patch addressed the issue for most LDR instructions, focusing on little-endian targets. This patch addresses the remaining work for ADRs and big-endian targets. --- .../Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 15 ++++++----- .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 6 +++++ llvm/test/MC/ARM/pcrel-adr16-relocs.s | 26 +++++++++++++++++++ llvm/test/MC/ARM/pcrel-adr32-relocs.s | 24 +++++++++++++++++ llvm/test/MC/ARM/pcrel-arm-ldr-imm8-relocs.s | 6 +++++ llvm/test/MC/ARM/pcrel-global.s | 10 ++----- llvm/test/MC/ARM/pcrel-ldr-relocs.s | 8 ++++-- llvm/test/MC/ARM/pcrel-thumb-ldr2-relocs.s | 3 +++ llvm/test/MC/ARM/thumb1-relax-adr.s | 1 - 9 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 llvm/test/MC/ARM/pcrel-adr16-relocs.s create mode 100644 llvm/test/MC/ARM/pcrel-adr32-relocs.s diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index ca3b77e4a35653..41b3c6005231e8 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -88,10 +88,12 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_arm_ldst_abs_12", 0, 32, 0}, {"fixup_thumb_adr_pcrel_10", 0, 8, - IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, - {"fixup_arm_adr_pcrel_12", 0, 32, IsPCRelConstant}, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_t2_adr_pcrel_12", 0, 32, - IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, @@ -133,10 +135,11 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { // ARMFixupKinds.h. // // Name Offset (bits) Size (bits) Flags - {"fixup_arm_ldst_pcrel_12", 0, 32, IsPCRelConstant}, + {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_t2_ldst_pcrel_12", 0, 32, - IsPCRelConstant | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, - {"fixup_arm_pcrel_10_unscaled", 0, 32, IsPCRelConstant}, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_arm_pcrel_10", 0, 32, IsPCRelConstant}, {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 985097fc328105..44695a86c4e36c 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -164,6 +164,12 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, return ELF::R_ARM_LDRS_PC_G0; case ARM::fixup_t2_ldst_pcrel_12: return ELF::R_ARM_THM_PC12; + case ARM::fixup_arm_adr_pcrel_12: + return ELF::R_ARM_ALU_PC_G0; + case ARM::fixup_thumb_adr_pcrel_10: + return ELF::R_ARM_THM_PC8; + case ARM::fixup_t2_adr_pcrel_12: + return ELF::R_ARM_THM_ALU_PREL_11_0; case ARM::fixup_bf_target: return ELF::R_ARM_THM_BF16; case ARM::fixup_bfc_target: diff --git a/llvm/test/MC/ARM/pcrel-adr16-relocs.s b/llvm/test/MC/ARM/pcrel-adr16-relocs.s new file mode 100644 index 00000000000000..adef746c3607a5 --- /dev/null +++ b/llvm/test/MC/ARM/pcrel-adr16-relocs.s @@ -0,0 +1,26 @@ +@ RUN: llvm-mc -filetype=obj --triple=thumbv6m-none-eabi %s -o %t +@ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=RELOC +@ RUN: llvm-objdump -d --triple=thumbv6m-none-eabi %t | FileCheck %s --check-prefix=ADDEND + + .section .text._func1, "ax" + + .balign 4 + .global _func1 + .type _func1, %function +_func1: + adr r0, _func2 +@ RELOC: R_ARM_THM_PC8 + bx lr + +// Checking the encoding only, as the disassembly is not quite correct here. +//00000000 <_func1>: +// 0: a0ff adr r0, #1020 <_func1+0x103> + +// Thumb16 encoding supports only adding of the encoded immediate (not +// subtracting, see [Arm ARM]), therefore sign change is required if the pcrel +// offset is negative. This makes the calculation of the addend for +// R_ARM_THM_PC8 more complex, for details see [ELF for the Arm 32-bit +// architecture]. + +@ ADDEND: a0ff adr + diff --git a/llvm/test/MC/ARM/pcrel-adr32-relocs.s b/llvm/test/MC/ARM/pcrel-adr32-relocs.s new file mode 100644 index 00000000000000..5fd30f24630f90 --- /dev/null +++ b/llvm/test/MC/ARM/pcrel-adr32-relocs.s @@ -0,0 +1,24 @@ +@ RUN: llvm-mc -filetype=obj -triple=armv7 %s -o %t +@ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=RELOC +@ RUN: llvm-objdump -d --triple=armv7 %t | FileCheck %s --check-prefix=ADDEND + +@ RUN: llvm-mc -filetype=obj --triple=armebv7-unknown-unknown %s -o %t +@ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=RELOC +@ RUN: llvm-objdump -d --triple=armebv7-unknown-unknown %t | FileCheck %s --check-prefix=ADDEND + + .section .text._func1, "ax" + + .balign 4 + .global _func1 + .type _func1, %function +_func1: + adr r0, _func2 +@ RELOC: R_ARM_ALU_PC_G0 + .thumb + adr r0, _func2 +@ RELOC: R_ARM_THM_ALU_PREL_11_0 + bx lr + +@ ADDEND: sub r0, pc, #8 +@ ADDEND-NEXT: adr.w r0, #-4 + diff --git a/llvm/test/MC/ARM/pcrel-arm-ldr-imm8-relocs.s b/llvm/test/MC/ARM/pcrel-arm-ldr-imm8-relocs.s index 40453d6ef341a4..f8b166d4c24858 100644 --- a/llvm/test/MC/ARM/pcrel-arm-ldr-imm8-relocs.s +++ b/llvm/test/MC/ARM/pcrel-arm-ldr-imm8-relocs.s @@ -1,6 +1,9 @@ @ RUN: llvm-mc -filetype=obj -triple=armv7 %s -o %t @ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=ARM @ RUN: llvm-objdump -d --triple=armv7 %t | FileCheck %s --check-prefix=ARM_ADDEND +@ RUN: llvm-mc -filetype=obj --triple=armebv7-unknown-unknown %s -o %t +@ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=ARM +@ RUN: llvm-objdump -d --triple=armebv7-unknown-unknown %t | FileCheck %s --check-prefix=ARM_ADDEND @ ARM: R_ARM_LDRS_PC_G0 @ ARM: R_ARM_LDRS_PC_G0 @@ -8,6 +11,7 @@ @ ARM: R_ARM_LDRS_PC_G0 @ ARM: R_ARM_LDRS_PC_G0 @ ARM: R_ARM_LDRS_PC_G0 +@ ARM: R_ARM_LDRS_PC_G0 // The value format is decimal in these specific cases, but it's hex for other // ldr instructions. These checks are valid for both formats. @@ -18,6 +22,7 @@ @ ARM_ADDEND: r0, [pc, #-{{16|0x10}}] @ ARM_ADDEND: r0, [pc, #-{{16|0x10}}] @ ARM_ADDEND: r0, [pc] +@ ARM_ADDEND: r0, r1, [pc] .arm .section .text.bar, "ax" @@ -31,6 +36,7 @@ bar: ldrh r0, just_after-8 ldrsb r0, just_after-8 ldrsh r0, foo+8 + ldrd r0,r1, foo+8 bx lr .section .data.foo, "a", %progbits diff --git a/llvm/test/MC/ARM/pcrel-global.s b/llvm/test/MC/ARM/pcrel-global.s index 15d46cf2063ecf..1e9e6e989356ec 100644 --- a/llvm/test/MC/ARM/pcrel-global.s +++ b/llvm/test/MC/ARM/pcrel-global.s @@ -7,11 +7,9 @@ @ CHECK: There are no relocations in this file. @ DISASM-LABEL: : -@ DISASM-NEXT: adr.w r0, #-4 -@ DISASM-NEXT: adr.w r0, #-8 -@ DISASM-NEXT: ldr r0, [pc, #0x0] @ 0x14 +@ DISASM-NEXT: ldr r0, [pc, #0x0] @ 0x8 @ DISASM-NEXT: add r0, pc -@ DISASM-NEXT: .word 0xfffffff3 +@ DISASM-NEXT: .word 0xfffffffb @@ GNU assembler creates an R_ARM_REL32 referencing bar. @ DISASM-NOT: {{.}} @@ -20,16 +18,12 @@ .globl foo foo: vldr d0, foo @ arm_pcrel_10 -adr r2, foo @ arm_adr_pcrel_12 .thumb .thumb_func .type bar, %function .globl bar bar: -adr r0, bar @ thumb_adr_pcrel_10 -adr.w r0, bar @ t2_adr_pcrel_12 - ldr r0, .LCPI .LPC0_1: add r0, pc diff --git a/llvm/test/MC/ARM/pcrel-ldr-relocs.s b/llvm/test/MC/ARM/pcrel-ldr-relocs.s index 120d54ebafe087..e0f27f29949993 100644 --- a/llvm/test/MC/ARM/pcrel-ldr-relocs.s +++ b/llvm/test/MC/ARM/pcrel-ldr-relocs.s @@ -4,12 +4,17 @@ @ RUN: llvm-mc -filetype=obj -triple=thumbv7 %s -o %t @ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=THUMB @ RUN: llvm-objdump -d --triple=thumbv7 %t | FileCheck %s --check-prefix=THUMB_ADDEND +@ RUN: llvm-mc -filetype=obj -triple=armebv7 %s -o %t +@ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=ARM +@ RUN: llvm-objdump -d --triple=armebv7 %t | FileCheck %s --check-prefix=ARM_ADDEND +@ RUN: llvm-mc -filetype=obj -triple=thumbebv7 %s -o %t +@ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=THUMB +@ RUN: llvm-objdump -d --triple=thumbebv7 %t | FileCheck %s --check-prefix=THUMB_ADDEND @ ARM: R_ARM_LDR_PC_G0 @ ARM: R_ARM_LDR_PC_G0 @ ARM: R_ARM_LDR_PC_G0 @ ARM: R_ARM_LDR_PC_G0 - @ ARM_ADDEND: r0, [pc, #-0x8] @ ARM_ADDEND: r0, [pc, #-0x8] @ ARM_ADDEND: r0, [pc, #-0x10] @@ -19,7 +24,6 @@ @ THUMB: R_ARM_THM_PC12 @ THUMB: R_ARM_THM_PC12 @ THUMB: R_ARM_THM_PC12 - @ THUMB_ADDEND: r0, [pc, #-0x4] @ THUMB_ADDEND: r0, [pc, #-0x4] @ THUMB_ADDEND: r0, [pc, #-0xc] diff --git a/llvm/test/MC/ARM/pcrel-thumb-ldr2-relocs.s b/llvm/test/MC/ARM/pcrel-thumb-ldr2-relocs.s index 17ca72bd3f00ca..3aa371fc7d702f 100644 --- a/llvm/test/MC/ARM/pcrel-thumb-ldr2-relocs.s +++ b/llvm/test/MC/ARM/pcrel-thumb-ldr2-relocs.s @@ -1,6 +1,9 @@ @ RUN: llvm-mc -filetype=obj -triple=thumbv7 %s -o %t @ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=THUMB @ RUN: llvm-objdump -d --triple=thumbv7 %t | FileCheck %s --check-prefix=THUMB_ADDEND +@ RUN: llvm-mc -filetype=obj --triple=thumbebv7-unknown-unknown %s -o %t +@ RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=THUMB +@ RUN: llvm-objdump -d --triple=thumbebv7-unknown-unknown %t | FileCheck %s --check-prefix=THUMB_ADDEND @ All the ldr variants produce a relocation @ THUMB: R_ARM_THM_PC12 diff --git a/llvm/test/MC/ARM/thumb1-relax-adr.s b/llvm/test/MC/ARM/thumb1-relax-adr.s index fc5c7c39df5ae1..97b566f4833e63 100644 --- a/llvm/test/MC/ARM/thumb1-relax-adr.s +++ b/llvm/test/MC/ARM/thumb1-relax-adr.s @@ -1,6 +1,5 @@ @ RUN: not llvm-mc -triple thumbv6m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s @ RUN: not llvm-mc -triple thumbv7m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s -@ RUN: not llvm-mc -triple thumbv7m-none-eabi -filetype=obj -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s .global func1 _func1: From 6ab7662f35bb5bc1d19a7e68ec0a710bbf71c2c4 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Fri, 1 Dec 2023 17:56:27 +0400 Subject: [PATCH 39/72] [clang][NFC] Refactor expected directives in C++ DRs 100-199 (#74061) This patch continues the work started with ea5b1ef016d020c37f903d6c7d4f623be975dab8. See that commit and its corresponding PR for details. --- clang/test/CXX/drs/dr0xx.cpp | 56 ++- clang/test/CXX/drs/dr1xx.cpp | 681 ++++++++++++++++++++++------------- 2 files changed, 458 insertions(+), 279 deletions(-) diff --git a/clang/test/CXX/drs/dr0xx.cpp b/clang/test/CXX/drs/dr0xx.cpp index e79ce6daf2655c..768da0f8e6fa77 100644 --- a/clang/test/CXX/drs/dr0xx.cpp +++ b/clang/test/CXX/drs/dr0xx.cpp @@ -89,12 +89,11 @@ namespace dr7 { // dr7: 3.4 class B : virtual private A {}; // #dr7-B class C : public B {} c; // #dr7-C // expected-error@#dr7-C {{inherited virtual base class 'A' has private destructor}} - // expected-note@#dr7-C {{in implicit default constructor for 'dr7::C' first required here}} - // expected-note@#dr7-B {{declared private here}} - + // expected-note@#dr7-C {{in implicit default constructor for 'dr7::C' first required here}} + // expected-note@#dr7-B {{declared private here}} // expected-error@#dr7-C {{inherited virtual base class 'A' has private destructor}} - // expected-note@#dr7-C {{in implicit destructor for 'dr7::C' first required here}} - // expected-note@#dr7-B {{declared private here}} + // expected-note@#dr7-C {{in implicit destructor for 'dr7::C' first required here}} + // expected-note@#dr7-B {{declared private here}} class VeryDerivedC : public B, virtual public A {} vdc; class X { ~X(); }; // #dr7-X @@ -237,11 +236,10 @@ namespace dr16 { // dr16: 2.8 // expected-note@#dr16-A-f-decl {{member is declared here}} A::f(); // #dr16-A-f-call // expected-error@#dr16-A-f-call {{'A' is a private member of 'dr16::A'}} - // expected-note@#dr16-B {{constrained by implicitly private inheritance here}} - // expected-note@#dr16-A {{member is declared here}} - + // expected-note@#dr16-B {{constrained by implicitly private inheritance here}} + // expected-note@#dr16-A {{member is declared here}} // expected-error@#dr16-A-f-call {{cannot cast 'dr16::C' to its private base class 'dr16::A'}} - // expected-note@#dr16-B {{implicitly declared private here}} + // expected-note@#dr16-B {{implicitly declared private here}} } }; } @@ -361,9 +359,9 @@ namespace dr26 { // dr26: yes // FIXME: In C++98, we diagnose this twice. B(const B &, B = B()); // cxx98-14-error@-1 {{recursive evaluation of default argument}} - // cxx98-14-note@-2 {{default argument used here}} + // cxx98-14-note@-2 {{default argument used here}} // cxx98-error@-3 {{recursive evaluation of default argument}} - // cxx98-note@-4 {{default argument used here}} + // cxx98-note@-4 {{default argument used here}} }; struct C { static C &f(); @@ -788,23 +786,20 @@ namespace dr49 { // dr49: 2.8 A<&k> a; A

b; // #dr49-b // cxx98-error@#dr49-b {{non-type template argument referring to object 'p' with internal linkage is a C++11 extension}} - // cxx98-note@#dr49-p {{non-type template argument refers to object here}} - + // cxx98-note@#dr49-p {{non-type template argument refers to object here}} // cxx98-14-error@#dr49-b {{non-type template argument for template parameter of pointer type 'int *' must have its address taken}} - // cxx98-14-note@#dr49-A {{template parameter is declared here}} + // cxx98-14-note@#dr49-A {{template parameter is declared here}} int *q = &k; // #dr49-q A c; // #dr49-c // cxx98-error@#dr49-c {{non-type template argument for template parameter of pointer type 'int *' must have its address taken}} - // cxx98-note@#dr49-A {{template parameter is declared here}} - + // cxx98-note@#dr49-A {{template parameter is declared here}} // cxx11-14-error@#dr49-c {{non-type template argument of type 'int *' is not a constant expression}} - // cxx11-14-note@#dr49-c {{read of non-constexpr variable 'q' is not allowed in a constant expression}} - // cxx11-14-note@#dr49-q {{declared here}} - // cxx11-14-note@#dr49-A {{template parameter is declared here}} - + // cxx11-14-note@#dr49-c {{read of non-constexpr variable 'q' is not allowed in a constant expression}} + // cxx11-14-note@#dr49-q {{declared here}} + // cxx11-14-note@#dr49-A {{template parameter is declared here}} // since-cxx17-error@#dr49-c {{non-type template argument is not a constant expression}} - // since-cxx17-note@#dr49-c {{read of non-constexpr variable 'q' is not allowed in a constant expression}} - // since-cxx17-note@#dr49-q {{declared here}} + // since-cxx17-note@#dr49-c {{read of non-constexpr variable 'q' is not allowed in a constant expression}} + // since-cxx17-note@#dr49-q {{declared here}} } namespace dr50 { // dr50: yes @@ -835,11 +830,10 @@ namespace dr52 { // dr52: 2.8 int k = b.A::n; // #dr52-k // FIXME: This first diagnostic is very strangely worded, and seems to be bogus. // expected-error@#dr52-k {{'A' is a private member of 'dr52::A'}} - // expected-note@#dr52-B {{constrained by private inheritance here}} - // expected-note@#dr52-A {{member is declared here}} - + // expected-note@#dr52-B {{constrained by private inheritance here}} + // expected-note@#dr52-A {{member is declared here}} // expected-error@#dr52-k {{cannot cast 'struct B' to its private base class 'dr52::A'}} - // expected-note@#dr52-B {{declared private here}} + // expected-note@#dr52-B {{declared private here}} } namespace dr53 { // dr53: yes @@ -1171,8 +1165,7 @@ namespace dr76 { // dr76: yes const volatile int n = 1; int arr[n]; // #dr76-vla // expected-error@#dr76-vla {{variable length arrays in C++ are a Clang extension}} - // expected-note@#dr76-vla {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}} - + // expected-note@#dr76-vla {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}} // expected-error@#dr76-vla {{variable length array declaration not allowed at file scope}} } @@ -1346,8 +1339,7 @@ namespace dr92 { // dr92: 4 c++17 // since-cxx17-note@-2 {{use 'noexcept(false)' instead}} void (*p)() throw(int) = &f; // #dr92-p // since-cxx17-error@#dr92-p {{ISO C++17 does not allow dynamic exception specifications}} - // since-cxx17-note@#dr92-p {{use 'noexcept(false)' instead}} - + // since-cxx17-note@#dr92-p {{use 'noexcept(false)' instead}} // cxx98-14-error@#dr92-p {{target exception specification is not superset of source}} // since-cxx17-warning@#dr92-p {{target exception specification is not superset of source}} void (*q)() throw(int); @@ -1363,11 +1355,11 @@ namespace dr92 { // dr92: 4 c++17 g(f); // cxx98-14-error@-1 {{target exception specification is not superset of source}} // since-cxx17-error@-2 {{no matching function for call to 'g'}} - // since-cxx17-note@#dr92-g {{candidate function not viable: no known conversion from 'void () throw(int, float)' to 'void (*)() throw()' for 1st argument}} + // since-cxx17-note@#dr92-g {{candidate function not viable: no known conversion from 'void () throw(int, float)' to 'void (*)() throw()' for 1st argument}} g(q); // cxx98-14-error@-1 {{target exception specification is not superset of source}} // since-cxx17-error@-2 {{no matching function for call to 'g'}} - // since-cxx17-note@#dr92-g {{candidate function not viable: no known conversion from 'void (*)() throw(int)' to 'void (*)() throw()' for 1st argument}} + // since-cxx17-note@#dr92-g {{candidate function not viable: no known conversion from 'void (*)() throw(int)' to 'void (*)() throw()' for 1st argument}} } // Prior to C++17, this is OK because the exception specification is not diff --git a/clang/test/CXX/drs/dr1xx.cpp b/clang/test/CXX/drs/dr1xx.cpp index 50236eb7c9499d..4465e7e0f1bfdb 100644 --- a/clang/test/CXX/drs/dr1xx.cpp +++ b/clang/test/CXX/drs/dr1xx.cpp @@ -1,30 +1,31 @@ -// RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify=expected,cxx98,cxx98-11,cxx98-14,cxx98-17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,cxx98-11,cxx98-14,cxx98-17,cxx11-14 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,cxx98-14,cxx98-17,cxx11-14 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17,cxx98-17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors namespace dr100 { // dr100: yes - template struct A {}; // expected-note 0-1{{declared here}} - template struct B {}; // expected-note 0-1{{declared here}} - template struct C {}; // expected-note 0-1{{declared here}} - template struct D {}; // expected-note 0-1{{declared here}} - A<&"foo"> a; // #100a - B<"bar"> b; // #100b - C<"baz"> c; // #100c - D<*"quux"> d; // #100d -#if __cplusplus < 201703L - // expected-error@#100a {{does not refer to any declaration}} - // expected-error@#100b {{does not refer to any declaration}} - // expected-error@#100c {{does not refer to any declaration}} - // expected-error@#100d {{does not refer to any declaration}} -#else - // expected-error@#100a {{pointer to string literal is not allowed in a template argument}} - // expected-error@#100b {{reference to string literal is not allowed in a template argument}} - // expected-error@#100c {{pointer to subobject of string literal is not allowed in a template argument}} - // expected-error@#100d {{reference to subobject of string literal is not allowed in a template argument}} -#endif + template struct A {}; // #dr100-A + template struct B {}; // #dr100-B + template struct C {}; // #dr100-C + template struct D {}; // #dr100-D + A<&"foo"> a; // #dr100-a + // cxx98-14-error@#dr100-a {{non-type template argument does not refer to any declaration}} + // cxx98-14-note@#dr100-A {{template parameter is declared here}} + // since-cxx17-error@#dr100-a {{pointer to string literal is not allowed in a template argument}} + B<"bar"> b; // #dr100-b + // cxx98-14-error@#dr100-b {{non-type template argument does not refer to any declaration}} + // cxx98-14-note@#dr100-B {{template parameter is declared here}} + // since-cxx17-error@#dr100-b {{reference to string literal is not allowed in a template argument}} + C<"baz"> c; // #dr100-c + // cxx98-14-error@#dr100-c {{non-type template argument does not refer to any declaration}} + // cxx98-14-note@#dr100-C {{template parameter is declared here}} + // since-cxx17-error@#dr100-c {{pointer to subobject of string literal is not allowed in a template argument}} + D<*"quux"> d; // #dr100-d + // cxx98-14-error@#dr100-d {{non-type template argument does not refer to any declaration}} + // cxx98-14-note@#dr100-D {{template parameter is declared here}} + // since-cxx17-error@#dr100-d {{reference to subobject of string literal is not allowed in a template argument}} } namespace dr101 { // dr101: 3.5 @@ -42,13 +43,16 @@ namespace dr101 { // dr101: 3.5 namespace dr102 { // dr102: yes namespace A { - template T f(T a, T b) { return a + b; } // expected-error {{neither visible in the template definition nor found by argument-dependent lookup}} + template T f(T a, T b) { return a + b; } + // expected-error@-1 {{call to function 'operator+' that is neither visible in the template definition nor found by argument-dependent lookup}} + // expected-note@#dr102-instantiation {{in instantiation of function template specialization 'dr102::A::f' requested here}} + // expected-note@#dr102-operator-plus {{'operator+' should be declared prior to the call site or in namespace 'dr102::B'}} } namespace B { struct S {}; } - B::S operator+(B::S, B::S); // expected-note {{should be declared prior to the call site or in namespace 'dr102::B'}} - template B::S A::f(B::S, B::S); // expected-note {{in instantiation of}} + B::S operator+(B::S, B::S); // #dr102-operator-plus + template B::S A::f(B::S, B::S); // #dr102-instantiation } // dr103: na @@ -58,13 +62,17 @@ namespace dr102 { // dr102: yes namespace dr106 { // dr106: sup 540 typedef int &r1; typedef r1 &r1; - typedef const r1 r1; // expected-warning {{has no effect}} - typedef const r1 &r1; // expected-warning {{has no effect}} + typedef const r1 r1; + // expected-warning@-1 {{'const' qualifier on reference type 'r1' (aka 'int &') has no effect}} + typedef const r1 &r1; + // expected-warning@-1 {{'const' qualifier on reference type 'r1' (aka 'int &') has no effect}} typedef const int &r2; typedef r2 &r2; - typedef const r2 r2; // expected-warning {{has no effect}} - typedef const r2 &r2; // expected-warning {{has no effect}} + typedef const r2 r2; + // expected-warning@-1 {{'const' qualifier on reference type 'r2' (aka 'const int &') has no effect}} + typedef const r2 &r2; + // expected-warning@-1 {{'const' qualifier on reference type 'r2' (aka 'const int &') has no effect}} } namespace dr107 { // dr107: yes @@ -76,10 +84,9 @@ namespace dr108 { // dr108: 2.9 template struct A { struct B { typedef int X; }; B::X x; -#if __cplusplus <= 201703L - // expected-error@-2 {{implicit 'typename' is a C++20 extension}} -#endif - struct C : B { X x; }; // expected-error {{unknown type name}} + // cxx98-17-error@-1 {{missing 'typename' prior to dependent type name B::X; implicit 'typename' is a C++20 extension}} + struct C : B { X x; }; + // expected-error@-1 {{unknown type name 'X'}} }; template<> struct A::B { int X; }; } @@ -87,46 +94,55 @@ namespace dr108 { // dr108: 2.9 namespace dr109 { // dr109: yes struct A { template void f(T); }; template struct B : T { - using T::template f; // expected-error {{'template' keyword not permitted here}} - using T::template f; // expected-error {{'template' keyword not permitted here}} expected-error {{using declaration cannot refer to a template specialization}} + using T::template f; + // expected-error@-1 {{'template' keyword not permitted here}} + using T::template f; + // expected-error@-1 {{'template' keyword not permitted here}} + // expected-error@-2 {{using declaration cannot refer to a template specialization}} // FIXME: We shouldn't suggest using the 'template' keyword in a location where it's not valid. - using T::f; // expected-error {{use 'template' keyword}} expected-error {{using declaration cannot refer to a template specialization}} - void g() { this->f(123); } // expected-error {{use 'template' keyword}} + using T::f; + // expected-error@-1 {{use 'template' keyword to treat 'f' as a dependent template name}} + // expected-error@-2 {{using declaration cannot refer to a template specialization}} + void g() { this->f(123); } + // expected-error@-1 {{use 'template' keyword to treat 'f' as a dependent template name}} }; } namespace dr111 { // dr111: dup 535 struct A { A(); A(volatile A&, int = 0); A(A&, const char * = "foo"); }; - struct B : A { B(); }; // expected-note +{{would lose const qualifier}} expected-note {{requires 0 arguments}} + struct B : A { B(); }; // #dr111-B const B b1; - B b2(b1); // expected-error {{no matching constructor}} + B b2(b1); + // expected-error@-1 {{no matching constructor for initialization of 'B'}} + // expected-note@#dr111-B {{candidate constructor (the implicit copy constructor) not viable: 1st argument ('const B') would lose const qualifier}} + // expected-note@#dr111-B {{candidate constructor not viable: requires 0 arguments, but 1 was provided}} } namespace dr112 { // dr112: yes struct T { int n; }; typedef T Arr[1]; - const T a1[1] = {}; + const T a1[1] = {}; // #dr112-a1 volatile T a2[1] = {}; - const Arr a3 = {}; + const Arr a3 = {}; // #dr112-a3 volatile Arr a4 = {}; template struct X {}; + // FIXME: Test this somehow in C++11 and on. X x1; + // cxx98-error@-1 {{non-type template argument referring to object 'a1' with internal linkage is a C++11 extension}} + // cxx98-note@#dr112-a1 {{non-type template argument refers to object here}} X x2; X x3; + // cxx98-error@-1 {{non-type template argument referring to object 'a3' with internal linkage is a C++11 extension}} + // cxx98-note@#dr112-a3 {{non-type template argument refers to object here}} X x4; -#if __cplusplus < 201103L - // expected-error@-5 {{internal linkage}} expected-note@-10 {{here}} - // expected-error@-4 {{internal linkage}} expected-note@-9 {{here}} -#else - // FIXME: Test this somehow. -#endif } namespace dr113 { // dr113: yes extern void (*p)(); void f() { - no_such_function(); // expected-error {{undeclared}} + no_such_function(); + // expected-error@-1 {{use of undeclared identifier 'no_such_function'}} p(); } void g(); @@ -135,31 +151,48 @@ namespace dr113 { // dr113: yes namespace dr114 { // dr114: yes struct A { - virtual void f(int) = 0; // expected-note {{unimplemented}} + virtual void f(int) = 0; // #dr114-A-f }; struct B : A { template void f(T); void g() { f(0); } - } b; // expected-error {{abstract}} + } b; + // expected-error@-1 {{variable type 'struct B' is an abstract class}} + // expected-note@#dr114-A-f {{unimplemented pure virtual method 'f' in 'B'}} } namespace dr115 { // dr115: 3.0 - template int f(T); // expected-note +{{}} - template int g(T); // expected-note +{{}} - template int g(T, int); // expected-note +{{}} + template int f(T); // #dr115-f + template int g(T); // #dr115-g + template int g(T, int); // #dr115-g-int - int k1 = f(&f); // expected-error {{no match}} + int k1 = f(&f); + // expected-error@-1 {{no matching function for call to 'f'}} + // expected-note@#dr115-f {{candidate template ignored: couldn't infer template argument 'T'}} int k2 = f(&f); - int k3 = f(&g); // expected-error {{no match}} + int k3 = f(&g); + // expected-error@-1 {{no matching function for call to 'f'}} + // expected-note@#dr115-f {{candidate template ignored: couldn't infer template argument 'T'}} void h() { - (void)&f; // expected-error {{address of overloaded function 'f' cannot be cast to type 'void'}} + (void)&f; + // expected-error@-1 {{address of overloaded function 'f' cannot be cast to type 'void'}} + // expected-note@#dr115-f {{candidate function template}} (void)&f; - (void)&g; // expected-error {{address of overloaded function 'g' cannot be cast to type 'void'}} - - &f; // expected-error {{reference to overloaded function could not be resolved}} - &f; // expected-warning {{unused}} - &g; // expected-error {{reference to overloaded function could not be resolved}} + (void)&g; + // expected-error@-1 {{address of overloaded function 'g' cannot be cast to type 'void'}} + // expected-note@#dr115-g-int {{candidate function template}} + // expected-note@#dr115-g {{candidate function template}} + + &f; + // expected-error@-1 {{reference to overloaded function could not be resolved; did you mean to call it?}} + // expected-note@#dr115-f {{possible target for call}} + &f; + // expected-warning@-1 {{expression result unused}} + &g; + // expected-error@-1 {{reference to overloaded function could not be resolved; did you mean to call it?}} + // expected-note@#dr115-g-int {{possible target for call}} + // expected-note@#dr115-g {{possible target for call}} } struct S { @@ -168,18 +201,25 @@ namespace dr115 { // dr115: 3.0 template static int g(T, int); } s; - int k4 = f(&s.f); // expected-error {{non-constant pointer to member}} + int k4 = f(&s.f); + // expected-error@-1 {{cannot create a non-constant pointer to member function}} int k5 = f(&s.f); - int k6 = f(&s.g); // expected-error {{non-constant pointer to member}} + int k6 = f(&s.g); + // expected-error@-1 {{cannot create a non-constant pointer to member function}} void i() { - (void)&s.f; // expected-error {{non-constant pointer to member}} + (void)&s.f; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} (void)&s.f; - (void)&s.g; // expected-error {{non-constant pointer to member}} - - &s.f; // expected-error {{non-constant pointer to member}} - &s.f; // expected-warning {{unused}} - &s.g; // expected-error {{non-constant pointer to member}} + (void)&s.g; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} + + &s.f; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} + &s.f; + // expected-warning@-1 {{expression result unused}} + &s.g; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} } struct T { @@ -188,40 +228,58 @@ namespace dr115 { // dr115: 3.0 template int g(T, int); } t; - int k7 = f(&s.f); // expected-error {{non-constant pointer to member}} + int k7 = f(&s.f); + // expected-error@-1 {{cannot create a non-constant pointer to member function}} int k8 = f(&s.f); - int k9 = f(&s.g); // expected-error {{non-constant pointer to member}} + int k9 = f(&s.g); + // expected-error@-1 {{cannot create a non-constant pointer to member function}} void j() { - (void)&s.f; // expected-error {{non-constant pointer to member}} + (void)&s.f; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} (void)&s.f; - (void)&s.g; // expected-error {{non-constant pointer to member}} - - &s.f; // expected-error {{non-constant pointer to member}} - &s.f; // expected-warning {{unused}} - &s.g; // expected-error {{non-constant pointer to member}} + (void)&s.g; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} + + &s.f; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} + &s.f; + // expected-warning@-1 {{expression result unused}} + &s.g; + // expected-error@-1 {{cannot create a non-constant pointer to member function}} } #if __cplusplus >= 201103L // Special case kicks in only if a template argument list is specified. - template void with_default(); // expected-note +{{}} - int k10 = f(&with_default); // expected-error {{no matching function}} + template void with_default(); // #dr115-with-default + int k10 = f(&with_default); + // expected-error@-1 {{no matching function for call to 'f'}} + // expected-note@#dr115-f {{candidate template ignored: couldn't infer template argument 'T'}} int k11 = f(&with_default<>); void k() { - (void)&with_default; // expected-error {{overloaded function}} + (void)&with_default; + // expected-error@-1 {{address of overloaded function 'with_default' cannot be cast to type 'void'}} + // expected-note@#dr115-with-default {{candidate function template}} (void)&with_default<>; - &with_default; // expected-error {{overloaded function}} - &with_default<>; // expected-warning {{unused}} + &with_default; + // expected-error@-1 {{reference to overloaded function could not be resolved; did you mean to call it?}} + // expected-note@#dr115-with-default {{possible target for call}} + &with_default<>; + // expected-warning@-1 {{expression result unused}} } #endif } namespace dr116 { // dr116: yes template struct A {}; - template void f(A) {} // expected-note {{previous}} - template void f(A) {} // expected-error {{redefinition}} - template void f(A) {} // expected-note {{previous}} - template void f(A) {} // expected-error {{redefinition}} + template void f(A) {} // #dr116-f-N + template void f(A) {} + // expected-error@-1 {{redefinition of 'f'}} + // expected-note@#dr116-f-N {{previous definition is here}} + template void f(A) {} // #dr116-f-T + template void f(A) {} + // expected-error@-1 {{redefinition of 'f'}} + // expected-note@#dr116-f-T {{previous definition is here}} } // dr117: na @@ -235,7 +293,9 @@ namespace dr121 { // dr121: yes }; template struct Z { X::Y x; - T::Y y; // expected-error +{{}} + T::Y y; + // expected-error@-1 {{use 'template' keyword to treat 'Y' as a dependent template name}} + // cxx98-17-error@-2 {{missing 'typename' prior to dependent type name T::Y; implicit 'typename' is a C++20 extension}} }; Z z; } @@ -249,15 +309,19 @@ namespace dr122 { // dr122: yes // dr124: dup 201 // dr125: yes -struct dr125_A { struct dr125_B {}; }; // expected-note {{here}} +struct dr125_A { struct dr125_B {}; }; // #dr125_B dr125_A::dr125_B dr125_C(); namespace dr125_B { dr125_A dr125_C(); } namespace dr125 { struct X { friend dr125_A::dr125_B (::dr125_C)(); // ok friend dr125_A (::dr125_B::dr125_C)(); // ok - friend dr125_A::dr125_B::dr125_C(); // expected-error {{did you mean the constructor name 'dr125_B'?}} - // expected-error@-1 {{missing exception specification}} + friend dr125_A::dr125_B::dr125_C(); // #dr125_C + // expected-error@#dr125_C {{missing return type for function 'dr125_C'; did you mean the constructor name 'dr125_B'?}} + // cxx98-error@#dr125_C {{'dr125_B' is missing exception specification 'throw()'}} + // cxx98-note@#dr125_B {{previous declaration is here}} + // since-cxx11-error@#dr125_C {{'dr125_B' is missing exception specification 'noexcept'}} + // since-cxx11-note@#dr125_B {{previous declaration is here}} }; } @@ -275,7 +339,6 @@ namespace dr126 { // dr126: partial // So, when catching by non-const (or volatile) reference to pointer, we // should compare the exception type to the caught type and only accept an // exact match. -#if __cplusplus <= 201402L struct C {}; struct D : C {}; struct E : private C { friend class A; friend class B; }; @@ -283,53 +346,64 @@ namespace dr126 { // dr126: partial struct G : C {}; struct H : D, G {}; +#if __cplusplus <= 201402L struct A { virtual void cp() throw(C*); virtual void dp() throw(C*); - virtual void ep() throw(C*); // expected-note {{overridden}} - virtual void fp() throw(C*); // expected-note {{overridden}} + virtual void ep() throw(C*); // #dr126-ep + virtual void fp() throw(C*); // #dr126-fp virtual void gp() throw(C*); - virtual void hp() throw(C*); // expected-note {{overridden}} + virtual void hp() throw(C*); // #dr126-hp virtual void cr() throw(C&); virtual void dr() throw(C&); - virtual void er() throw(C&); // expected-note {{overridden}} - virtual void fr() throw(C&); // expected-note {{overridden}} + virtual void er() throw(C&); // #dr126-er + virtual void fr() throw(C&); // #dr126-fr virtual void gr() throw(C&); - virtual void hr() throw(C&); // expected-note {{overridden}} + virtual void hr() throw(C&); // #dr126-hr virtual void pv() throw(void*); -#if __cplusplus >= 201103L virtual void np() throw(C*); virtual void npm() throw(int C::*); - virtual void nr() throw(C*&); // expected-note {{overridden}} + virtual void nr() throw(C*&); // #dr126-nr virtual void ncr() throw(C*const&); -#endif virtual void ref1() throw(C *const&); virtual void ref2() throw(C *); virtual void v() throw(int); virtual void w() throw(const int); - virtual void x() throw(int*); // expected-note {{overridden}} + virtual void x() throw(int*); // #dr126-x virtual void y() throw(const int*); - virtual void z() throw(int); // expected-note {{overridden}} + virtual void z() throw(int); // #dr126-z }; struct B : A { virtual void cp() throw(C*); virtual void dp() throw(D*); - virtual void ep() throw(E*); // expected-error {{more lax}} - virtual void fp() throw(F*); // expected-error {{more lax}} + virtual void ep() throw(E*); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-ep {{overridden virtual function is here}} + virtual void fp() throw(F*); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-fp {{overridden virtual function is here}} virtual void gp() throw(G*); - virtual void hp() throw(H*); // expected-error {{more lax}} + virtual void hp() throw(H*); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-hp {{overridden virtual function is here}} virtual void cr() throw(C&); virtual void dr() throw(D&); - virtual void er() throw(E&); // expected-error {{more lax}} - virtual void fr() throw(F&); // expected-error {{more lax}} + virtual void er() throw(E&); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-er {{overridden virtual function is here}} + virtual void fr() throw(F&); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-fr {{overridden virtual function is here}} virtual void gr() throw(G&); - virtual void hr() throw(H&); // expected-error {{more lax}} + virtual void hr() throw(H&); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-hr {{overridden virtual function is here}} virtual void pv() throw(C*); @@ -337,22 +411,29 @@ namespace dr126 { // dr126: partial using nullptr_t = decltype(nullptr); virtual void np() throw(nullptr_t); virtual void npm() throw(nullptr_t&); - virtual void nr() throw(nullptr_t); // expected-error {{more lax}} + virtual void nr() throw(nullptr_t); + // cxx11-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx11-14-note@#dr126-nr {{overridden virtual function is here}} virtual void ncr() throw(nullptr_t); -#endif +#endif // __cplusplus >= 201103L virtual void ref1() throw(D *const &); virtual void ref2() throw(D *); virtual void v() throw(const int); virtual void w() throw(int); - virtual void x() throw(const int*); // expected-error {{more lax}} + virtual void x() throw(const int*); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-x {{overridden virtual function is here}} virtual void y() throw(int*); // ok - virtual void z() throw(long); // expected-error {{more lax}} + virtual void z() throw(long); + // cxx98-14-error@-1 {{exception specification of overriding function is more lax than base version}} + // cxx98-14-note@#dr126-z {{overridden virtual function is here}} }; -#else - void f() throw(int); // expected-error {{ISO C++17 does not allow}} expected-note {{use 'noexcept}} -#endif +#endif // __cplusplus <= 201402L + void f() throw(int); + // since-cxx17-error@-1 {{ISO C++17 does not allow dynamic exception specifications}} + // since-cxx17-note@-2 {{use 'noexcept(false)' instead}} } namespace dr127 { // dr127: yes @@ -360,11 +441,16 @@ namespace dr127 { // dr127: yes template struct A { A() { throw 0; } void *operator new(size_t, const char * = 0); - void operator delete(void *, const char *) { T::error; } // expected-error 2{{no members}} + void operator delete(void *, const char *) { T::error; } // #dr127-delete-const-char + // expected-error@#dr127-delete-const-char {{type 'void' cannot be used prior to '::' because it has no members}} + // expected-note@#dr127-p {{in instantiation of member function 'dr127::A::operator delete' requested here}} + + // expected-error@#dr127-delete-const-char {{type 'int' cannot be used prior to '::' because it has no members}} + // expected-note@#dr127-q {{in instantiation of member function 'dr127::A::operator delete' requested here}} void operator delete(void *) { T::error; } }; - A *p = new A; // expected-note {{instantiat}} - A *q = new ("") A; // expected-note {{instantiat}} + A *p = new A; // #dr127-p + A *q = new ("") A; // #dr127-q } namespace dr128 { // dr128: yes @@ -401,36 +487,50 @@ namespace dr135 { // dr135: yes } namespace dr136 { // dr136: 3.4 - void f(int, int, int = 0); // expected-note {{previous declaration is here}} - void g(int, int, int); // expected-note {{previous declaration is here}} + void f(int, int, int = 0); // #dr136-f + void g(int, int, int); // #dr136-g struct A { - friend void f(int, int = 0, int); // expected-error {{friend declaration specifying a default argument must be the only declaration}} - friend void g(int, int, int = 0); // expected-error {{friend declaration specifying a default argument must be the only declaration}} - friend void h(int, int, int = 0); // expected-error {{friend declaration specifying a default argument must be a definition}} - friend void i(int, int, int = 0) {} // expected-note {{previous declaration is here}} + friend void f(int, int = 0, int); + // expected-error@-1 {{friend declaration specifying a default argument must be the only declaration}} + // expected-note@#dr136-f {{previous declaration is here}} + friend void g(int, int, int = 0); + // expected-error@-1 {{friend declaration specifying a default argument must be the only declaration}} + // expected-note@#dr136-g {{previous declaration is here}} + friend void h(int, int, int = 0); + // expected-error@-1 {{friend declaration specifying a default argument must be a definition}} + friend void i(int, int, int = 0) {} // #dr136-A-i friend void j(int, int, int = 0) {} operator int(); }; - void i(int, int, int); // expected-error {{friend declaration specifying a default argument must be the only declaration}} + void i(int, int, int); + // expected-error@-1 {{friend declaration specifying a default argument must be the only declaration}} + // expected-note@#dr136-A-i {{previous declaration is here}} void q() { j(A(), A()); // ok, has default argument } - extern "C" void k(int, int, int, int); // expected-note 2{{previous declaration is here}} + extern "C" void k(int, int, int, int); // #dr136-k namespace NSA { struct A { - friend void dr136::k(int, int, int, int = 0); // expected-error {{friend declaration specifying a default argument must be the only declaration}} + friend void dr136::k(int, int, int, int = 0); + // expected-error@-1 {{friend declaration specifying a default argument must be the only declaration}} + // expected-note@#dr136-k {{previous declaration is here}} }; } namespace NSB { struct A { - friend void dr136::k(int, int, int = 0, int); // expected-error {{missing default argument on parameter}} expected-error {{must be the only declaration}} + friend void dr136::k(int, int, int = 0, int); // #dr136-friend-k + // expected-error@#dr136-friend-k {{friend declaration specifying a default argument must be the only declaration}} + // expected-note@#dr136-k {{previous declaration is here}} + // expected-error@#dr136-friend-k {{missing default argument on parameter}} }; } struct B { - void f(int); // expected-note {{previous declaration is here}} + void f(int); // #dr136-B-f }; struct C { - friend void B::f(int = 0); // expected-error {{friend declaration specifying a default argument must be the only declaration}} + friend void B::f(int = 0); + // expected-error@-1 {{friend declaration specifying a default argument must be the only declaration}} + // expected-note@#dr136-B-f {{previous declaration is here}} }; } @@ -440,13 +540,18 @@ namespace dr137 { // dr137: yes extern volatile void *vp; extern const volatile void *cvp; int *q = static_cast(p); - int *qc = static_cast(cp); // expected-error {{casts away qualifiers}} - int *qv = static_cast(vp); // expected-error {{casts away qualifiers}} - int *qcv = static_cast(cvp); // expected-error {{casts away qualifiers}} + int *qc = static_cast(cp); + // expected-error@-1 {{static_cast from 'const void *' to 'int *' casts away qualifiers}} + int *qv = static_cast(vp); + // expected-error@-1 {{static_cast from 'volatile void *' to 'int *' casts away qualifiers}} + int *qcv = static_cast(cvp); + // expected-error@-1 {{static_cast from 'const volatile void *' to 'int *' casts away qualifiers}} const int *cq = static_cast(p); const int *cqc = static_cast(cp); - const int *cqv = static_cast(vp); // expected-error {{casts away qualifiers}} - const int *cqcv = static_cast(cvp); // expected-error {{casts away qualifiers}} + const int *cqv = static_cast(vp); + // expected-error@-1 {{static_cast from 'volatile void *' to 'const int *' casts away qualifiers}} + const int *cqcv = static_cast(cvp); + // expected-error@-1 {{static_cast from 'const volatile void *' to 'const int *' casts away qualifiers}} const volatile int *cvq = static_cast(p); const volatile int *cvqc = static_cast(cp); const volatile int *cvqv = static_cast(vp); @@ -455,9 +560,11 @@ namespace dr137 { // dr137: yes namespace dr139 { // dr139: yes namespace example1 { - typedef int f; // expected-note {{previous}} + typedef int f; // #dr139-typedef-f struct A { - friend void f(A &); // expected-error {{different kind of symbol}} + friend void f(A &); + // expected-error@-1 {{redefinition of 'f' as different kind of symbol}} + // expected-note@#dr139-typedef-f {{previous definition is here}} }; } @@ -474,35 +581,41 @@ namespace dr139 { // dr139: yes } namespace dr140 { // dr140: yes - void f(int *const) {} // expected-note {{previous}} - void f(int[3]) {} // expected-error {{redefinition}} + void f(int *const) {} // #dr140-f-first + void f(int[3]) {} + // expected-error@-1 {{redefinition of 'f'}} + // expected-note@#dr140-f-first {{previous definition is here}} void g(const int); void g(int n) { n = 2; } } namespace dr141 { // dr141: 3.1 template void f(); - template struct S { int n; }; // expected-note {{'::dr141::S::n' declared here}} + template struct S { int n; }; // #dr141-S struct A : S { template void f(); - template struct S {}; + template struct S {}; // #dr141-A-S } a; struct B : S {} b; void g() { a.f(); - (void)a.S::n; // expected-error {{no member named 'n' in 'dr141::A::S'; did you mean '::dr141::S::n'?}} -#if __cplusplus < 201103L - // expected-error@-2 {{ambiguous}} - // expected-note@-11 {{lookup from the current scope}} - // expected-note@-9 {{lookup in the object type}} -#endif - b.f(); // expected-error {{no member}} expected-error +{{}} + (void)a.S::n; // #dr141-a + // cxx98-error@#dr141-a {{lookup of 'S' in member access expression is ambiguous; using member of 'struct A'}} + // cxx98-note@#dr141-A-S {{lookup in the object type 'struct A' refers here}} + // cxx98-note@#dr141-S {{lookup from the current scope refers here}} + // expected-error@#dr141-a {{no member named 'n' in 'dr141::A::S'; did you mean '::dr141::S::n'?}} + // expected-note@#dr141-S {{'::dr141::S::n' declared here}} + // FIXME: we issue a useful diagnostic first, then some bogus ones. + b.f(); + // expected-error@-1 {{no member named 'f' in 'dr141::B'}} + // expected-error@-2 +{{}} (void)b.S::n; } template struct C { T t; void g() { - t.f(); // expected-error {{use 'template'}} + t.f(); + // expected-error@-1 {{use 'template' keyword to treat 'f' as a dependent template name}} } void h() { (void)t.S::n; // ok @@ -519,28 +632,53 @@ namespace dr141 { // dr141: 3.1 } namespace dr142 { // dr142: 2.8 - class B { // expected-note +{{here}} + class B { // #dr142-B public: - int mi; // expected-note +{{here}} - static int si; // expected-note +{{here}} + int mi; // #dr142-B-mi + static int si; // #dr142-B-si }; - class D : private B { // expected-note +{{here}} + class D : private B { // #dr142-D }; class DD : public D { void f(); }; void DD::f() { - mi = 3; // expected-error {{private member}} - si = 3; // expected-error {{private member}} - B b_old; // expected-error {{private member}} + mi = 3; + // expected-error@-1 {{'mi' is a private member of 'dr142::B'}} + // expected-note@#dr142-D {{constrained by private inheritance here}} + // expected-note@#dr142-B-mi {{member is declared here}} + si = 3; + // expected-error@-1 {{'si' is a private member of 'dr142::B'}} + // expected-note@#dr142-D {{constrained by private inheritance here}} + // expected-note@#dr142-B-si {{member is declared here}} + B b_old; + // expected-error@-1 {{'B' is a private member of 'dr142::B'}} + // expected-note@#dr142-D {{constrained by private inheritance here}} + // expected-note@#dr142-B {{member is declared here}} dr142::B b; b.mi = 3; b.si = 3; - B::si = 3; // expected-error {{private member}} + B::si = 3; + // expected-error@-1 {{'B' is a private member of 'dr142::B'}} + // expected-note@#dr142-D {{constrained by private inheritance here}} + // expected-note@#dr142-B {{member is declared here}} dr142::B::si = 3; - B *bp1_old = this; // expected-error {{private member}} expected-error {{private base class}} - dr142::B *bp1 = this; // expected-error {{private base class}} - B *bp2_old = (B*)this; // expected-error 2{{private member}} + B *bp1_old = this; // #dr142-bp1_old + // expected-error@#dr142-bp1_old {{'B' is a private member of 'dr142::B'}} + // expected-note@#dr142-D {{constrained by private inheritance here}} + // expected-note@#dr142-B {{member is declared here}} + // expected-error@#dr142-bp1_old {{cannot cast 'dr142::DD' to its private base class 'B'}} + // expected-note@#dr142-D {{declared private here}} + dr142::B *bp1 = this; + // expected-error@-1 {{cannot cast 'dr142::DD' to its private base class 'dr142::B'}} + // expected-note@#dr142-D {{declared private here}} + B *bp2_old = (B*)this; // #dr142-bp2_old + // expected-error@#dr142-bp2_old {{'B' is a private member of 'dr142::B'}} + // expected-note@#dr142-D {{constrained by private inheritance here}} + // expected-note@#dr142-B {{member is declared here}} + // expected-error@#dr142-bp2_old {{'B' is a private member of 'dr142::B'}} + // expected-note@#dr142-D {{constrained by private inheritance here}} + // expected-note@#dr142-B {{member is declared here}} dr142::B *bp2 = (dr142::B*)this; bp2->mi = 3; } @@ -553,19 +691,19 @@ namespace dr143 { // dr143: yes struct X { friend void B::f(X); }; } void g(A::X x) { - f(x); // expected-error {{undeclared identifier 'f'}} + f(x); + // expected-error@-1 {{use of undeclared identifier 'f'}} } } namespace dr145 { // dr145: yes void f(bool b) { -#if __cplusplus <= 201402L - ++b; // expected-warning {{deprecated}} - b++; // expected-warning {{deprecated}} -#else - ++b; // expected-error {{increment}} - b++; // expected-error {{increment}} -#endif + ++b; + // cxx98-14-warning@-1 {{incrementing expression of type bool is deprecated and incompatible with C++17}} + // since-cxx17-error@-2 {{ISO C++17 does not allow incrementing expression of type bool}} + b++; + // cxx98-14-warning@-1 {{incrementing expression of type bool is deprecated and incompatible with C++17}} + // since-cxx17-error@-2 {{ISO C++17 does not allow incrementing expression of type bool}} } } @@ -576,13 +714,15 @@ namespace dr147 { // dr147: yes }; // Per core issue 1435, this is ill-formed because A::A does not // name the injected-class-name. (A::A does, though.) - template<> template<> A::A(int) {} // expected-error {{out-of-line constructor for 'A' cannot have template arguments}} + template<> template<> A::A(int) {} + // expected-error@-1 {{out-of-line constructor for 'A' cannot have template arguments}} template<> template<> A::A(float) {} } namespace example2 { struct A { A(); }; struct B : A { B(); }; - A::A a1; // expected-error {{is a constructor}} + A::A a1; + // expected-error@-1 {{qualified reference to 'A' is a constructor name rather than a type in this context}} B::A a2; } namespace example3 { @@ -590,7 +730,8 @@ namespace dr147 { // dr147: yes template A(T); static A a; }; - template<> A::A(A::a); // expected-error {{is a constructor}} + template<> A::A(A::a); + // expected-error@-1 {{qualified reference to 'A' is a constructor name rather than a template name in this context}} } } @@ -616,24 +757,28 @@ namespace dr151 { // dr151: 3.1 namespace dr152 { // dr152: yes struct A { - A(); // expected-note 0-2{{not viable}} - explicit A(const A&); // expected-note 1-2{{not a candidate}} + A(); // #dr152-A-ctor + explicit A(const A&); // #dr152-A-explicit-ctor }; A a1 = A(); -#if __cplusplus <= 201402L - // expected-error@-2 {{no matching constructor}} -#endif + // cxx98-14-error@-1 {{no matching constructor for initialization of 'A'}} + // cxx98-14-note@#dr152-A-explicit-ctor {{explicit constructor is not a candidate}} + // cxx98-14-note@#dr152-A-ctor {{candidate constructor not viable: requires 0 arguments, but 1 was provided}} A a2((A())); A &f(); - A a3 = f(); // expected-error {{no matching constructor}} + A a3 = f(); + // expected-error@-1 {{no matching constructor for initialization of 'A'}} + // expected-note@#dr152-A-explicit-ctor {{explicit constructor is not a candidate}} + // expected-note@#dr152-A-ctor {{candidate constructor not viable: requires 0 arguments, but 1 was provided}} A a4(f()); } // dr153: na namespace dr154 { // dr154: yes - union { int a; }; // expected-error {{must be declared 'static'}} + union { int a; }; + // expected-error@-1 {{nonymous unions at namespace or global scope must be declared 'static'}} namespace { union { int b; }; } @@ -641,7 +786,8 @@ namespace dr154 { // dr154: yes } namespace dr155 { // dr155: dup 632 - struct S { int n; } s = { { 1 } }; // expected-warning {{braces around scalar initializer}} + struct S { int n; } s = { { 1 } }; + // expected-warning@-1 {{braces around scalar initializer}} } // dr158 is in its own file. @@ -649,7 +795,8 @@ namespace dr155 { // dr155: dup 632 namespace dr159 { // dr159: 3.5 namespace X { void f(); } void f(); - void dr159::f() {} // expected-warning {{extra qualification}} + void dr159::f() {} + // expected-warning@-1 {{extra qualification on member 'f'}} void dr159::X::f() {} } @@ -658,9 +805,9 @@ namespace dr159 { // dr159: 3.5 namespace dr161 { // dr161: 3.1 class A { protected: - struct B { int n; } b; // expected-note 2{{here}} + struct B { int n; } b; // #dr161-B static B bs; - void f(); // expected-note {{here}} + void f(); // #dr161-f static void sf(); }; struct C : A {}; @@ -669,13 +816,19 @@ namespace dr161 { // dr161: 3.1 (void)b.n; B b1; C::B b2; // ok, accessible as a member of A - (void)&C::b; // expected-error {{protected}} + (void)&C::b; + // expected-error@-1 {{'b' is a protected member of 'dr161::A'}} + // expected-note@#dr161-B {{declared protected here}} (void)&C::bs; - (void)c.b; // expected-error {{protected}} + (void)c.b; + // expected-error@-1 {{'b' is a protected member of 'dr161::A'}} + // expected-note@#dr161-B {{declared protected here}} (void)c.bs; f(); sf(); - c.f(); // expected-error {{protected}} + c.f(); + // expected-error@-1 {{protected}} + // expected-note@#dr161-f {{declared protected here}} c.sf(); A::f(); D::f(); @@ -692,13 +845,17 @@ namespace dr162 { // dr162: no static int &f(int); void g() { - int &a = (&A::f)(0); // FIXME: expected-error {{could not be resolved}} - char &b = (&A::f)('0'); // expected-error {{could not be resolved}} + int &a = (&A::f)(0); + // FIXME: expected-error@-1 {{reference to overloaded function could not be resolved; did you mean to call it?}} + char &b = (&A::f)('0'); + // expected-error@-1 {{reference to overloaded function could not be resolved; did you mean to call it?}} } }; - int &c = (&A::f)(0); // FIXME: expected-error {{could not be resolved}} - char &d = (&A::f)('0'); // expected-error {{could not be resolved}} + int &c = (&A::f)(0); + // FIXME: expected-error@-1 {{reference to overloaded function could not be resolved; did you mean to call it?}} + char &d = (&A::f)('0'); + // expected-error@-1 {{reference to overloaded function could not be resolved; did you mean to call it?}} } // dr163: na @@ -729,7 +886,10 @@ namespace dr166 { // dr166: 2.9 template int f(T t) { return t.n; } int g(A::X); - template int h(T t) { return t.n; } // expected-error {{private}} + template int h(T t) { return t.n; } + // expected-error@-1 {{'n' is a private member of 'dr166::A::X'}} + // expected-note@#dr166-h-instantiation {{in instantiation of function template specialization 'dr166::h' requested here}} + // expected-note@#dr166-X-n {{implicitly declared private here}} int i(A::X); namespace A { @@ -738,7 +898,7 @@ namespace dr166 { // dr166: 2.9 friend int dr166::g(X); friend int h(X); friend int i(X); - int n; // expected-note 2{{here}} + int n; // #dr166-X-n }; int h(X x) { return x.n; } @@ -747,8 +907,10 @@ namespace dr166 { // dr166: 2.9 template int f(A::X); int g(A::X x) { return x.n; } - template int h(A::X); // expected-note {{instantiation}} - int i(A::X x) { return x.n; } // expected-error {{private}} + template int h(A::X); // #dr166-h-instantiation + int i(A::X x) { return x.n; } + // expected-error@-1 {{'n' is a private member of 'dr166::A::X'}} + // expected-note@#dr166-X-n {{implicitly declared private here}} } // dr167: sup 1012 @@ -768,23 +930,29 @@ namespace dr169 { // dr169: yes struct B { template struct C; template void f(); - template static int n; // expected-error 0-1{{extension}} + template static int n; + // cxx98-11-error@-1 {{variable templates are a C++14 extension}} }; struct D : A, B { using A::n; - using B::C; // expected-error {{using declaration cannot refer to a template specialization}} - using B::f; // expected-error {{using declaration cannot refer to a template specialization}} - using B::n; // expected-error {{using declaration cannot refer to a template specialization}} + using B::C; + // expected-error@-1 {{using declaration cannot refer to a template specialization}} + using B::f; + // expected-error@-1 {{using declaration cannot refer to a template specialization}} + using B::n; + // expected-error@-1 {{using declaration cannot refer to a template specialization}} }; } namespace { // dr171: yes int dr171a; } -int dr171b; // expected-note {{here}} +int dr171b; // #dr171b-int namespace dr171 { extern "C" void dr171a(); - extern "C" void dr171b(); // expected-error {{conflicts}} + extern "C" void dr171b(); + // expected-error@-1 {{declaration of 'dr171b' with C language linkage conflicts with declaration in global scope}} + // expected-note@#dr171b-int {{declared in global scope here}} } namespace dr172 { // dr172: yes @@ -810,12 +978,14 @@ namespace dr172 { // dr172: yes int check6a[sizeof(d) == sizeof(unsigned long) ? 1 : -1]; int check6b[-d > 0 ? 1 : -1]; - enum { e = (unsigned long long)-1 / 2 }; // expected-error 0-1{{extension}} - int check7a[sizeof(e) == sizeof(long) ? 1 : -1]; // expected-error 0-1{{extension}} + enum { e = (unsigned long long)-1 / 2 }; + // cxx98-error@-1 {{'long long' is a C++11 extension}} + int check7a[sizeof(e) == sizeof(long) ? 1 : -1]; int check7b[-e < 0 ? 1 : -1]; - enum { f = (unsigned long long)-1 / 2 + 1 }; // expected-error 0-1{{extension}} - int check8a[sizeof(f) == sizeof(unsigned long) ? 1 : -1]; // expected-error 0-1{{extension}} + enum { f = (unsigned long long)-1 / 2 + 1 }; + // cxx98-error@-1 {{'long long' is a C++11 extension}} + int check8a[sizeof(f) == sizeof(unsigned long) ? 1 : -1]; int check8b[-f > 0 ? 1 : -1]; } @@ -828,10 +998,13 @@ namespace dr173 { // dr173: yes // dr174: sup 1012 namespace dr175 { // dr175: 2.8 - struct A {}; // expected-note {{here}} - struct B : private A {}; // expected-note {{constrained by private inheritance}} + struct A {}; // #dr175-A + struct B : private A {}; // #dr175-B struct C : B { - A a; // expected-error {{private}} + A a; + // expected-error@-1 {{'A' is a private member of 'dr175::A'}} + // expected-note@#dr175-B {{constrained by private inheritance here}} + // expected-note@#dr175-A {{member is declared here}} dr175::A b; }; } @@ -840,12 +1013,14 @@ namespace dr176 { // dr176: 3.1 template class Y; template<> class Y { void f() { - typedef Y A; // expected-note {{here}} - typedef Y A; // expected-error {{different types ('Y' vs 'Y')}} + typedef Y A; // #dr176-A-first + typedef Y A; + // expected-error@-1 {{typedef redefinition with different types ('Y' vs 'Y')}} + // expected-note@#dr176-A-first {{previous definition is here}} } }; - template struct Base {}; // expected-note 2{{found}} + template struct Base {}; // #dr176-Base template struct Derived : public Base { void f() { typedef typename Derived::template Base A; @@ -855,35 +1030,44 @@ namespace dr176 { // dr176: 3.1 template struct Derived; template struct Derived2 : Base, Base { - typename Derived2::Base b; // expected-error {{found in multiple base classes}} + typename Derived2::Base b; + // expected-error@-1 {{member 'Base' found in multiple base classes of different types}} + // expected-note@#dr176-Base {{member type 'dr176::Base' found by ambiguous name lookup}} + // expected-note@#dr176-Base {{member type 'dr176::Base' found by ambiguous name lookup}} typename Derived2::Base d; }; - template class X { // expected-note {{here}} + template class X { // #dr176-X X *p1; X *p2; X *p3; - dr176::X *p4; // expected-error {{requires template arguments}} + dr176::X *p4; // #dr176-p4 + // cxx98-14-error@#dr176-p4 {{use of class template 'dr176::X' requires template arguments}} + // cxx98-14-note@#dr176-X {{template is declared here}} + // since-cxx17-error@#dr176-p4 {{use of class template 'X' requires template arguments; argument deduction not allowed in non-static class member}} + // since-cxx17-note@#dr176-X {{template is declared here}} }; } namespace dr177 { // dr177: yes struct B {}; struct A { - A(A &); // expected-note 0-1{{not viable: expects an lvalue}} - A(const B &); // expected-note 0-1{{not viable: no known conversion from 'A' to}} + A(A &); // #dr177-A-copy-ctor + A(const B &); // #dr177-A-ctor-from-B }; B b; A a = b; -#if __cplusplus <= 201402L - // expected-error@-2 {{no viable constructor copying variable}} -#endif + // cxx98-14-error@-1 {{no viable constructor copying variable of type 'A'}} + // cxx98-14-note@#dr177-A-copy-ctor {{candidate constructor not viable: expects an lvalue for 1st argument}} + // cxx98-14-note@#dr177-A-ctor-from-B {{candidate constructor not viable: no known conversion from 'A' to 'const B &' for 1st argument}} - struct C { C(C&); }; // expected-note {{not viable: expects an lvalue for 1st argument}} + struct C { C(C&); }; // #dr177-C-copy-ctor struct D : C {}; struct E { operator D(); }; E e; - C c = e; // expected-error {{no viable constructor copying variable of type 'D'}} + C c = e; + // expected-error@-1 {{no viable constructor copying variable of type 'D'}} + // expected-note@#dr177-C-copy-ctor {{candidate constructor not viable: expects an lvalue for 1st argument}} } namespace dr178 { // dr178: yes @@ -901,7 +1085,8 @@ namespace dr178 { // dr178: yes namespace dr179 { // dr179: yes void f(); - int n = &f - &f; // expected-error {{arithmetic on pointers to the function type 'void ()'}} + int n = &f - &f; + // expected-error@-1 {{arithmetic on pointers to the function type 'void ()'}} } namespace dr180 { // dr180: 2.8 @@ -916,8 +1101,10 @@ namespace dr180 { // dr180: 2.8 namespace dr181 { // dr181: yes namespace X { - template