Skip to content

Commit

Permalink
[AMDGPU] change order of fp and sp in kernel prologue (#90626)
Browse files Browse the repository at this point in the history
change order of fp and sp in kernel prologue also related codegen tests
to make it easier to merge code into our downstream branches

Signed-off-by: gangc <gangc@amd.com>
  • Loading branch information
cmc-rep authored May 1, 2024
1 parent 9226688 commit 167427f
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 19 deletions.
12 changes: 6 additions & 6 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,19 +683,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);

if (hasFP(MF)) {
Register FPReg = MFI->getFrameOffsetReg();
assert(FPReg != AMDGPU::FP_REG);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
}

if (requiresStackPointerReference(MF)) {
Register SPReg = MFI->getStackPtrOffsetReg();
assert(SPReg != AMDGPU::SP_REG);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
.addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
}

if (hasFP(MF)) {
Register FPReg = MFI->getFrameOffsetReg();
assert(FPReg != AMDGPU::FP_REG);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
}

bool NeedsFlatScratchInit =
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s9
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_movk_i32 s32, 0x400
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_movk_i32 s32, 0x400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
Expand Down Expand Up @@ -87,8 +87,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s9
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_movk_i32 s32, 0x1000
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_movk_i32 s32, 0x1000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cbranch_scc1 .LBB1_2
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/cc-update.ll
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_getpc_b64 s[16:17]
; GFX803-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX803-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
Expand All @@ -340,8 +340,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_getpc_b64 s[16:17]
; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
Expand All @@ -351,8 +351,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX1010-LABEL: test_force_fp_kern_call:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_add_u32 s10, s10, s15
; GFX1010-NEXT: s_mov_b32 s32, 0
; GFX1010-NEXT: s_mov_b32 s33, 0
; GFX1010-NEXT: s_mov_b32 s32, 0
; GFX1010-NEXT: s_addc_u32 s11, s11, 0
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
Expand All @@ -378,16 +378,16 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX1100-NEXT: s_mov_b32 s13, s14
; GFX1100-NEXT: s_mov_b32 s14, s15
; GFX1100-NEXT: s_mov_b32 s32, 0
; GFX1100-NEXT: s_mov_b32 s33, 0
; GFX1100-NEXT: s_mov_b32 s32, 0
; GFX1100-NEXT: s_getpc_b64 s[6:7]
; GFX1100-NEXT: s_add_u32 s6, s6, ex@rel32@lo+4
; GFX1100-NEXT: s_addc_u32 s7, s7, ex@rel32@hi+12
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX1100-NEXT: s_endpgm
; GFX1010-NEXT s_add_u32 s12, s12, s17
; GFX1010-NEXT s_mov_b32 s32, 0
; GFX1010-NEXT s_mov_b32 s33, 0
; GFX1010-NEXT s_mov_b32 s32, 0
; GFX1010-NEXT s_addc_u32 s13, s13, 0
; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
Expand Down Expand Up @@ -459,8 +459,8 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX1010-LABEL: test_force_fp_kern_stack_and_call:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_add_u32 s10, s10, s15
; GFX1010-NEXT: s_movk_i32 s32, 0x200
; GFX1010-NEXT: s_mov_b32 s33, 0
; GFX1010-NEXT: s_movk_i32 s32, 0x200
; GFX1010-NEXT: s_addc_u32 s11, s11, 0
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-LABEL: test_kernel:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_mov_b32 s32, 0x180000
; CHECK-NEXT: s_mov_b32 s33, 0
; CHECK-NEXT: s_mov_b32 s32, 0x180000
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
; CHECK-NEXT: s_add_u32 s0, s0, s15
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; MUBUF-NEXT: s_add_u32 s0, s0, s9
; MUBUF-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
; MUBUF-NEXT: s_movk_i32 s32, 0x400
; MUBUF-NEXT: s_mov_b32 s33, 0
; MUBUF-NEXT: s_movk_i32 s32, 0x400
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: s_cmp_lg_u32 s8, 0
; MUBUF-NEXT: s_cbranch_scc1 .LBB0_3
Expand Down Expand Up @@ -57,8 +57,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
; FLATSCR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; FLATSCR-NEXT: s_mov_b32 s32, 16
; FLATSCR-NEXT: s_mov_b32 s33, 0
; FLATSCR-NEXT: s_mov_b32 s32, 16
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; FLATSCR-NEXT: s_cmp_lg_u32 s4, 0
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_3
Expand Down Expand Up @@ -125,8 +125,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; MUBUF-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8
; MUBUF-NEXT: s_add_u32 s0, s0, s9
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
; MUBUF-NEXT: s_movk_i32 s32, 0x1000
; MUBUF-NEXT: s_mov_b32 s33, 0
; MUBUF-NEXT: s_movk_i32 s32, 0x1000
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: s_cmp_lg_u32 s6, 0
; MUBUF-NEXT: s_cbranch_scc1 .LBB1_2
Expand Down Expand Up @@ -159,8 +159,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; FLATSCR-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
; FLATSCR-NEXT: s_mov_b32 s32, 64
; FLATSCR-NEXT: s_mov_b32 s33, 0
; FLATSCR-NEXT: s_mov_b32 s32, 64
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; FLATSCR-NEXT: s_cmp_lg_u32 s2, 0
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_2
Expand Down

0 comments on commit 167427f

Please sign in to comment.