From fdc7ccf5432f72e6c87a78625bb55f8d4ceaea16 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 2 Dec 2024 15:12:29 -0800 Subject: [PATCH] [RISCV][GISel] Allow fp128 function arguments. Add more testing for large arguments. --- .../Target/RISCV/GISel/RISCVCallLowering.cpp | 2 +- ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 230 ++++++++++++ .../calling-conv-lp64-lp64f-lp64d-common.ll | 348 ++++++++++++++++++ 3 files changed, 579 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index f50611aac5dd1f..3ab6d8aa8d6345 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -350,7 +350,7 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget, bool IsLowerArgs = false) { if (T->isIntegerTy()) return true; - if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy()) + if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy() || T->isFP128Ty()) return true; if (T->isPointerTy()) return true; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll index fb00e4860ddf2a..2d75cbd21a6fdf 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -825,6 +825,236 @@ define i32 @caller_many_scalars() nounwind { ret i32 %1 } + +; Check that i128 and fp128 are passed indirectly + +define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { + ; RV32I-LABEL: name: callee_large_scalars + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 8) + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128)) + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s128), [[LOAD1]] + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; RV32I-NEXT: $x10 = COPY [[ZEXT]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars() nounwind { + ; ILP32-LABEL: name: caller_large_scalars + ; ILP32: bb.1 (%ir-block.0): + ; ILP32-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1 + ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8) + ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1) + ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0) + ; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0) + ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; ILP32-NEXT: $x10 = COPY [[COPY]](s32) + ; ILP32-NEXT: PseudoRET implicit $x10 + ; + ; ILP32F-LABEL: name: caller_large_scalars + ; ILP32F: bb.1 (%ir-block.0): + ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1 + ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8) + ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1) + ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0) + ; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0) + ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; ILP32F-NEXT: $x10 = COPY [[COPY]](s32) + ; ILP32F-NEXT: PseudoRET implicit $x10 + ; + ; ILP32D-LABEL: name: caller_large_scalars + ; ILP32D: bb.1 (%ir-block.0): + ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1 + ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8) + ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1) + ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0) + ; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0) + ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; ILP32D-NEXT: $x10 = COPY [[COPY]](s32) + ; ILP32D-NEXT: PseudoRET implicit $x10 + %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) nounwind { + ; RV32I-LABEL: name: callee_large_scalars_exhausted_regs + ; RV32I: bb.1 (%ir-block.0): + ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 + ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 + ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14 + ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15 + ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16 + ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(p0) = COPY $x17 + ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY7]](p0) :: (load (s128), align 8) + ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.1, align 16) + ; RV32I-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; RV32I-NEXT: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.0) + ; RV32I-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[LOAD2]](p0) :: (load (s128)) + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s128), [[LOAD3]] + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; RV32I-NEXT: $x10 = COPY [[ZEXT]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + %j_bitcast = bitcast fp128 %j to i128 + %1 = icmp eq i128 %h, %j_bitcast + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @caller_large_scalars_exhausted_regs() nounwind { + ; ILP32-LABEL: name: caller_large_scalars_exhausted_regs + ; ILP32: bb.1 (%ir-block.0): + ; ILP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; ILP32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; ILP32-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; ILP32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; ILP32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; ILP32-NEXT: [[C7:%[0-9]+]]:_(s128) = G_CONSTANT i128 8 + ; ILP32-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; ILP32-NEXT: [[C9:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; ILP32-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2 + ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; ILP32-NEXT: G_STORE [[C7]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8) + ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; ILP32-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; ILP32-NEXT: G_STORE [[C8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16) + ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; ILP32-NEXT: G_STORE [[C9]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1) + ; ILP32-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; ILP32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; ILP32-NEXT: G_STORE [[FRAME_INDEX1]](p0), [[PTR_ADD1]](p0) :: (store (p0) into stack + 4) + ; ILP32-NEXT: $x10 = COPY [[C]](s32) + ; ILP32-NEXT: $x11 = COPY [[C1]](s32) + ; ILP32-NEXT: $x12 = COPY [[C2]](s32) + ; ILP32-NEXT: $x13 = COPY [[C3]](s32) + ; ILP32-NEXT: $x14 = COPY [[C4]](s32) + ; ILP32-NEXT: $x15 = COPY [[C5]](s32) + ; ILP32-NEXT: $x16 = COPY [[C6]](s32) + ; ILP32-NEXT: $x17 = COPY [[FRAME_INDEX]](p0) + ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars_exhausted_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; ILP32-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2 + ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; ILP32-NEXT: $x10 = COPY [[COPY1]](s32) + ; ILP32-NEXT: PseudoRET implicit $x10 + ; + ; ILP32F-LABEL: name: caller_large_scalars_exhausted_regs + ; ILP32F: bb.1 (%ir-block.0): + ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; ILP32F-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; ILP32F-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; ILP32F-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; ILP32F-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; ILP32F-NEXT: [[C7:%[0-9]+]]:_(s128) = G_CONSTANT i128 8 + ; ILP32F-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; ILP32F-NEXT: [[C9:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; ILP32F-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2 + ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; ILP32F-NEXT: G_STORE [[C7]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8) + ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; ILP32F-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; ILP32F-NEXT: G_STORE [[C8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16) + ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; ILP32F-NEXT: G_STORE [[C9]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1) + ; ILP32F-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; ILP32F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; ILP32F-NEXT: G_STORE [[FRAME_INDEX1]](p0), [[PTR_ADD1]](p0) :: (store (p0) into stack + 4) + ; ILP32F-NEXT: $x10 = COPY [[C]](s32) + ; ILP32F-NEXT: $x11 = COPY [[C1]](s32) + ; ILP32F-NEXT: $x12 = COPY [[C2]](s32) + ; ILP32F-NEXT: $x13 = COPY [[C3]](s32) + ; ILP32F-NEXT: $x14 = COPY [[C4]](s32) + ; ILP32F-NEXT: $x15 = COPY [[C5]](s32) + ; ILP32F-NEXT: $x16 = COPY [[C6]](s32) + ; ILP32F-NEXT: $x17 = COPY [[FRAME_INDEX]](p0) + ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars_exhausted_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; ILP32F-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2 + ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; ILP32F-NEXT: $x10 = COPY [[COPY1]](s32) + ; ILP32F-NEXT: PseudoRET implicit $x10 + ; + ; ILP32D-LABEL: name: caller_large_scalars_exhausted_regs + ; ILP32D: bb.1 (%ir-block.0): + ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; ILP32D-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; ILP32D-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; ILP32D-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; ILP32D-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; ILP32D-NEXT: [[C7:%[0-9]+]]:_(s128) = G_CONSTANT i128 8 + ; ILP32D-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; ILP32D-NEXT: [[C9:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; ILP32D-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2 + ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; ILP32D-NEXT: G_STORE [[C7]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8) + ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; ILP32D-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; ILP32D-NEXT: G_STORE [[C8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16) + ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; ILP32D-NEXT: G_STORE [[C9]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1) + ; ILP32D-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; ILP32D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; ILP32D-NEXT: G_STORE [[FRAME_INDEX1]](p0), [[PTR_ADD1]](p0) :: (store (p0) into stack + 4) + ; ILP32D-NEXT: $x10 = COPY [[C]](s32) + ; ILP32D-NEXT: $x11 = COPY [[C1]](s32) + ; ILP32D-NEXT: $x12 = COPY [[C2]](s32) + ; ILP32D-NEXT: $x13 = COPY [[C3]](s32) + ; ILP32D-NEXT: $x14 = COPY [[C4]](s32) + ; ILP32D-NEXT: $x15 = COPY [[C5]](s32) + ; ILP32D-NEXT: $x16 = COPY [[C6]](s32) + ; ILP32D-NEXT: $x17 = COPY [[FRAME_INDEX]](p0) + ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars_exhausted_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; ILP32D-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2 + ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; ILP32D-NEXT: $x10 = COPY [[COPY1]](s32) + ; ILP32D-NEXT: PseudoRET implicit $x10 + %1 = call i32 @callee_large_scalars_exhausted_regs( + i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, + fp128 0xL00000000000000007FFF000000000000) + ret i32 %1 +} + ; Check return of 2x xlen scalars define i64 @callee_small_scalar_ret() nounwind { diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll index 3a0831b3c5214c..d6d3a3c07fbcf1 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll @@ -83,6 +83,82 @@ define i64 @caller_i128_in_regs() nounwind { ret i64 %1 } +define i64 @callee_i128_fp128_in_regs(i128 %a, fp128 %b) nounwind { + ; RV64I-LABEL: name: callee_i128_fp128_in_regs + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11, $x12, $x13 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) + ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x12 + ; RV64I-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x13 + ; RV64I-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64) + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[MV]](s128), [[MV1]] + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; RV64I-NEXT: $x10 = COPY [[ZEXT]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + %b_bitcast = bitcast fp128 %b to i128 + %1 = icmp eq i128 %a, %b_bitcast + %2 = zext i1 %1 to i64 + ret i64 %2 +} + +define i64 @caller_i128_fp128_in_regs() nounwind { + ; LP64-LABEL: name: caller_i128_fp128_in_regs + ; LP64: bb.1 (%ir-block.0): + ; LP64-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1 + ; LP64-NEXT: [[C1:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[C]](s128) + ; LP64-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[C1]](s128) + ; LP64-NEXT: $x10 = COPY [[UV]](s64) + ; LP64-NEXT: $x11 = COPY [[UV1]](s64) + ; LP64-NEXT: $x12 = COPY [[UV2]](s64) + ; LP64-NEXT: $x13 = COPY [[UV3]](s64) + ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_i128_fp128_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit-def $x10 + ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64-NEXT: $x10 = COPY [[COPY]](s64) + ; LP64-NEXT: PseudoRET implicit $x10 + ; + ; LP64F-LABEL: name: caller_i128_fp128_in_regs + ; LP64F: bb.1 (%ir-block.0): + ; LP64F-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1 + ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[C]](s128) + ; LP64F-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[C1]](s128) + ; LP64F-NEXT: $x10 = COPY [[UV]](s64) + ; LP64F-NEXT: $x11 = COPY [[UV1]](s64) + ; LP64F-NEXT: $x12 = COPY [[UV2]](s64) + ; LP64F-NEXT: $x13 = COPY [[UV3]](s64) + ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_i128_fp128_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit-def $x10 + ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64F-NEXT: $x10 = COPY [[COPY]](s64) + ; LP64F-NEXT: PseudoRET implicit $x10 + ; + ; LP64D-LABEL: name: caller_i128_fp128_in_regs + ; LP64D: bb.1 (%ir-block.0): + ; LP64D-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1 + ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[C]](s128) + ; LP64D-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[C1]](s128) + ; LP64D-NEXT: $x10 = COPY [[UV]](s64) + ; LP64D-NEXT: $x11 = COPY [[UV1]](s64) + ; LP64D-NEXT: $x12 = COPY [[UV2]](s64) + ; LP64D-NEXT: $x13 = COPY [[UV3]](s64) + ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_i128_fp128_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit-def $x10 + ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64D-NEXT: $x10 = COPY [[COPY]](s64) + ; LP64D-NEXT: PseudoRET implicit $x10 + %1 = call i64 @callee_i128_fp128_in_regs(i128 1, fp128 0xL00000000000000007FFF000000000000) + ret i64 %1 +} + ; Check the correct handling of passing of values that are larger that 2*XLen. define i32 @caller_i256_indirect_reference_in_stack() { @@ -480,6 +556,233 @@ define i32 @caller_many_scalars() nounwind { ret i32 %1 } +; Check that i256 is passed indirectly. + +define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { + ; RV64I-LABEL: name: callee_large_scalars + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 16) + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11 + ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 16) + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s256), [[LOAD1]] + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; RV64I-NEXT: $x10 = COPY [[ZEXT]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + %1 = icmp eq i256 %a, %b + %2 = zext i1 %1 to i64 + ret i64 %2 +} + +define i64 @caller_large_scalars() nounwind { + ; LP64-LABEL: name: caller_large_scalars + ; LP64: bb.1 (%ir-block.0): + ; LP64-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 + ; LP64-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2 + ; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; LP64-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16) + ; LP64-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; LP64-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16) + ; LP64-NEXT: $x10 = COPY [[FRAME_INDEX]](p0) + ; LP64-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0) + ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64-NEXT: $x10 = COPY [[COPY]](s64) + ; LP64-NEXT: PseudoRET implicit $x10 + ; + ; LP64F-LABEL: name: caller_large_scalars + ; LP64F: bb.1 (%ir-block.0): + ; LP64F-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 + ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2 + ; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; LP64F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16) + ; LP64F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; LP64F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16) + ; LP64F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0) + ; LP64F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0) + ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64F-NEXT: $x10 = COPY [[COPY]](s64) + ; LP64F-NEXT: PseudoRET implicit $x10 + ; + ; LP64D-LABEL: name: caller_large_scalars + ; LP64D: bb.1 (%ir-block.0): + ; LP64D-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 + ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2 + ; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; LP64D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16) + ; LP64D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; LP64D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16) + ; LP64D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0) + ; LP64D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0) + ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64D-NEXT: $x10 = COPY [[COPY]](s64) + ; LP64D-NEXT: PseudoRET implicit $x10 + %1 = call i64 @callee_large_scalars(i256 1, i256 2) + ret i64 %1 +} + +; Check that arguments larger than 2*xlen are handled correctly when their +; address is passed on the stack rather than in memory + +; Must keep define on a single line due to an update_llc_test_checks.py limitation +define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind { + ; RV64I-LABEL: name: callee_large_scalars_exhausted_regs + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x12 + ; RV64I-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x13 + ; RV64I-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x14 + ; RV64I-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x15 + ; RV64I-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x16 + ; RV64I-NEXT: [[COPY7:%[0-9]+]]:_(p0) = COPY $x17 + ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY7]](p0) :: (load (s256), align 16) + ; RV64I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %fixed-stack.1, align 16) + ; RV64I-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; RV64I-NEXT: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.0) + ; RV64I-NEXT: [[LOAD3:%[0-9]+]]:_(s256) = G_LOAD [[LOAD2]](p0) :: (load (s256), align 16) + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s256), [[LOAD3]] + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ICMP]](s1) + ; RV64I-NEXT: $x10 = COPY [[ZEXT]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + %1 = icmp eq i256 %h, %j + %2 = zext i1 %1 to i64 + ret i64 %2 +} + +define i64 @caller_large_scalars_exhausted_regs() nounwind { + ; LP64-LABEL: name: caller_large_scalars_exhausted_regs + ; LP64: bb.1 (%ir-block.0): + ; LP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; LP64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; LP64-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; LP64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; LP64-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; LP64-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; LP64-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; LP64-NEXT: [[C7:%[0-9]+]]:_(s256) = G_CONSTANT i256 8 + ; LP64-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 9 + ; LP64-NEXT: [[C9:%[0-9]+]]:_(s256) = G_CONSTANT i256 10 + ; LP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; LP64-NEXT: G_STORE [[C7]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16) + ; LP64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; LP64-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) + ; LP64-NEXT: G_STORE [[C8]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16) + ; LP64-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; LP64-NEXT: G_STORE [[C9]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16) + ; LP64-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; LP64-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) + ; LP64-NEXT: G_STORE [[FRAME_INDEX1]](p0), [[PTR_ADD1]](p0) :: (store (p0) into stack + 8) + ; LP64-NEXT: $x10 = COPY [[C]](s64) + ; LP64-NEXT: $x11 = COPY [[C1]](s64) + ; LP64-NEXT: $x12 = COPY [[C2]](s64) + ; LP64-NEXT: $x13 = COPY [[C3]](s64) + ; LP64-NEXT: $x14 = COPY [[C4]](s64) + ; LP64-NEXT: $x15 = COPY [[C5]](s64) + ; LP64-NEXT: $x16 = COPY [[C6]](s64) + ; LP64-NEXT: $x17 = COPY [[FRAME_INDEX]](p0) + ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars_exhausted_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; LP64-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64-NEXT: $x10 = COPY [[COPY1]](s64) + ; LP64-NEXT: PseudoRET implicit $x10 + ; + ; LP64F-LABEL: name: caller_large_scalars_exhausted_regs + ; LP64F: bb.1 (%ir-block.0): + ; LP64F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; LP64F-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; LP64F-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; LP64F-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; LP64F-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; LP64F-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; LP64F-NEXT: [[C7:%[0-9]+]]:_(s256) = G_CONSTANT i256 8 + ; LP64F-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 9 + ; LP64F-NEXT: [[C9:%[0-9]+]]:_(s256) = G_CONSTANT i256 10 + ; LP64F-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; LP64F-NEXT: G_STORE [[C7]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16) + ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; LP64F-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) + ; LP64F-NEXT: G_STORE [[C8]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16) + ; LP64F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; LP64F-NEXT: G_STORE [[C9]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16) + ; LP64F-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; LP64F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) + ; LP64F-NEXT: G_STORE [[FRAME_INDEX1]](p0), [[PTR_ADD1]](p0) :: (store (p0) into stack + 8) + ; LP64F-NEXT: $x10 = COPY [[C]](s64) + ; LP64F-NEXT: $x11 = COPY [[C1]](s64) + ; LP64F-NEXT: $x12 = COPY [[C2]](s64) + ; LP64F-NEXT: $x13 = COPY [[C3]](s64) + ; LP64F-NEXT: $x14 = COPY [[C4]](s64) + ; LP64F-NEXT: $x15 = COPY [[C5]](s64) + ; LP64F-NEXT: $x16 = COPY [[C6]](s64) + ; LP64F-NEXT: $x17 = COPY [[FRAME_INDEX]](p0) + ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars_exhausted_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; LP64F-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64F-NEXT: $x10 = COPY [[COPY1]](s64) + ; LP64F-NEXT: PseudoRET implicit $x10 + ; + ; LP64D-LABEL: name: caller_large_scalars_exhausted_regs + ; LP64D: bb.1 (%ir-block.0): + ; LP64D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; LP64D-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; LP64D-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; LP64D-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; LP64D-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; LP64D-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; LP64D-NEXT: [[C7:%[0-9]+]]:_(s256) = G_CONSTANT i256 8 + ; LP64D-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 9 + ; LP64D-NEXT: [[C9:%[0-9]+]]:_(s256) = G_CONSTANT i256 10 + ; LP64D-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; LP64D-NEXT: G_STORE [[C7]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16) + ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; LP64D-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) + ; LP64D-NEXT: G_STORE [[C8]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16) + ; LP64D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; LP64D-NEXT: G_STORE [[C9]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16) + ; LP64D-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; LP64D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) + ; LP64D-NEXT: G_STORE [[FRAME_INDEX1]](p0), [[PTR_ADD1]](p0) :: (store (p0) into stack + 8) + ; LP64D-NEXT: $x10 = COPY [[C]](s64) + ; LP64D-NEXT: $x11 = COPY [[C1]](s64) + ; LP64D-NEXT: $x12 = COPY [[C2]](s64) + ; LP64D-NEXT: $x13 = COPY [[C3]](s64) + ; LP64D-NEXT: $x14 = COPY [[C4]](s64) + ; LP64D-NEXT: $x15 = COPY [[C5]](s64) + ; LP64D-NEXT: $x16 = COPY [[C6]](s64) + ; LP64D-NEXT: $x17 = COPY [[FRAME_INDEX]](p0) + ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_large_scalars_exhausted_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 + ; LP64D-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64D-NEXT: $x10 = COPY [[COPY1]](s64) + ; LP64D-NEXT: PseudoRET implicit $x10 + %1 = call i64 @callee_large_scalars_exhausted_regs( + i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9, + i256 10) + ret i64 %1 +} + ; Check return of 2x xlen scalars define i128 @callee_small_scalar_ret() nounwind { @@ -541,6 +844,51 @@ define i64 @caller_small_scalar_ret() nounwind { ret i64 %3 } +define fp128 @callee_fp128_ret() nounwind { + ; RV64I-LABEL: name: callee_fp128_ret + ; RV64I: bb.1 (%ir-block.0): + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s128) = G_FCONSTANT fp128 0xL00000000000000007FFF000000000000 + ; RV64I-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[C]](s128) + ; RV64I-NEXT: $x10 = COPY [[UV]](s64) + ; RV64I-NEXT: $x11 = COPY [[UV1]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10, implicit $x11 + ret fp128 0xL00000000000000007FFF000000000000 +} + +define void @caller_fp128_ret() nounwind { + ; LP64-LABEL: name: caller_fp128_ret + ; LP64: bb.1 (%ir-block.0): + ; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_fp128_ret, csr_ilp32_lp64, implicit-def $x1, implicit-def $x10, implicit-def $x11 + ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LP64-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) + ; LP64-NEXT: PseudoRET + ; + ; LP64F-LABEL: name: caller_fp128_ret + ; LP64F: bb.1 (%ir-block.0): + ; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_fp128_ret, csr_ilp32f_lp64f, implicit-def $x1, implicit-def $x10, implicit-def $x11 + ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64F-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LP64F-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) + ; LP64F-NEXT: PseudoRET + ; + ; LP64D-LABEL: name: caller_fp128_ret + ; LP64D: bb.1 (%ir-block.0): + ; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_fp128_ret, csr_ilp32d_lp64d, implicit-def $x1, implicit-def $x10, implicit-def $x11 + ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; LP64D-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 + ; LP64D-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) + ; LP64D-NEXT: PseudoRET + %1 = call fp128 @callee_fp128_ret() + ret void +} + ; Check return of 2x xlen structs %struct.small = type { i64, ptr }