-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CodeGen/test: improve a test, regen with UTC #113338
Conversation
@llvm/pr-subscribers-backend-nvptx @llvm/pr-subscribers-backend-powerpc Author: Ramkumar Ramachandra (artagnon) ChangesPatch is 150.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113338.diff 2 Files Affected:
diff --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index f922fd92fa244e..8435e016096621 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
@@ -22,149 +23,297 @@
; generic statespace
-; CHECK-LABEL: generic_weak
define void @generic_weak(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_weak_param_0];
+; CHECK-NEXT: ld.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_weak_param_2];
+; CHECK-NEXT: st.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_weak_param_3];
+; CHECK-NEXT: ld.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.f64 %fd1, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.f64 [%rd4], %fd2;
+; CHECK-NEXT: ld.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr %a
- ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr %b
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr %c
- ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr %d
- ; CHECK: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr %c
- ; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr %d
%f.add = fadd double %f.load, 1.
- ; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr %d
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the next tests. This test lowers to a weak PTX
; vector op, but next test lowers to a vector PTX op.
- ; CHECK: ld.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr %b
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the previous test. This test lowers to a weak
; PTX scalar op, but prior test lowers to a vector PTX op.
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr %c
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr %c
- ; CHECK: ld.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr %d
- ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr %d
- ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr %d
- ; CHECK: ld.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr %d
- ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr %d
- ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr %d
- ; CHECK: ld.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr %d
ret void
}
-; CHECK-LABEL: generic_volatile
define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_param_0];
+; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_volatile_param_2];
+; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_volatile_param_3];
+; CHECK-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.volatile.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.volatile.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.volatile.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.volatile.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.volatile.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.volatile.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.volatile.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.volatile.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.volatile.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.volatile.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.volatile.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.volatile.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.volatile.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.volatile.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.volatile.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.volatile.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.volatile.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.volatile.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr %a
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr %b
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr %c
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr %d
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr %c
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr %c
; TODO: volatile, atomic, and volatile atomic memory operations on vector types.
@@ -184,254 +333,358 @@ define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr
; TODO: make this operation consistent with the one for <4 x i8>
; This operation lowers to a "element wise volatile PTX operation".
- ; CHECK: ld.volatile.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.volatile.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <2 x i8> %h.add, ptr %b
; TODO: make this operation consistent with the one for <2 x i8>
; This operation lowers to a "full vector volatile PTX operation".
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <4 x i8> %i.add, ptr %c
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <2 x i16> %j.add, ptr %c
- ; CHECK: ld.volatile.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.volatile.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <4 x i16> %k.add, ptr %d
- ; CHECK: ld.volatile.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.volatile.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <2 x i32> %l.add, ptr %d
- ; CHECK: ld.volatile.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.volatile.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <4 x i32> %m.add, ptr %d
- ; CHECK: ld.volatile.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.volatile.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile <2 x i64> %n.add, ptr %d
- ; CHECK: ld.volatile.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.volatile.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <2 x float> %o.add, ptr %d
- ; CHECK: ld.volatile.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.volatile.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <4 x float> %p.add, ptr %d
- ; CHECK: ld.volatile.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.volatile.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile <2 x double> %q.add, ptr %d
ret void
}
-; CHECK-LABEL: generic_unordered_sys
define void @generic_unordered_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: generic_unordered_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_param_0];
+; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [generic_unordered_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [generic_unordered_sys_param_2];
+; SM60-NEXT: st.volatile.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [generic_unordered_sys_param_3];
+; SM60-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [generic_unordered_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.u32 [%rd3], %r2;
+; SM60-NEXT:...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like the idea of testing individual instructions separately and using automatic check generation.
The only question is whether the tests could be trimmed down further to reduce the amount of code in-between the loads/stores.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/160/builds/7730 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/180/builds/7728 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/7953 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/7854 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/7840 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/13551 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/11761 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/7815 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/8183 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/108/builds/5531 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/11313 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/169/builds/4948 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/5788 Here is the relevant piece of the build log for the reference
|
Looks like the test updates crossed in the air with the changes that switched from |
Done with #114779. |
No description provided.