From c5304c2e74de946868cfaa6e19e679d1ad7f4926 Mon Sep 17 00:00:00 2001 From: Qiao Pengcheng Date: Tue, 23 Apr 2024 19:41:05 +0800 Subject: [PATCH] Adjust the calleeSavedRegs on top frame for LoongArch64/RISCV64 (#100962) The frame layout: | | |-----------------------| | incoming arguments | +=======================+ <---- Caller's SP | Varargs regs space | // Only for varargs main functions; not used for LA64. |-----------------------| | MonitorAcquired | // 8 bytes; for synchronized methods |-----------------------| | PSP slot | // 8 bytes (omitted in NativeAOT ABI) |-----------------------| |Callee saved registers | // multiple of 8 bytes, not includting FP/RA |-----------------------| | Saved RA | // 8 bytes |-----------------------| | Saved FP | // 8 bytes |-----------------------| | possible GS cookie | |-----------------------| | locals, temps, etc. | |-----------------------| | possible GS cookie | |-----------------------| | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) |-----------------------| <---- Ambient SP | | | ~ | Stack grows ~ | | downward | --- src/coreclr/jit/codegen.h | 23 +- src/coreclr/jit/codegencommon.cpp | 17 +- src/coreclr/jit/codegenloongarch64.cpp | 587 +++++++++---------------- src/coreclr/jit/codegenriscv64.cpp | 485 ++++++++------------ src/coreclr/jit/lclvars.cpp | 61 ++- src/coreclr/jit/regset.h | 3 +- 6 files changed, 450 insertions(+), 726 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 3511935a062b0a..f5e0dc857a7d4c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -437,7 +437,7 @@ class CodeGen final : public CodeGenInterface FuncletFrameInfoDsc genFuncletInfo; -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // A set of information that is used by funclet prolog and epilog generation. // It is collected once, before funclet prologs and epilogs are generated, @@ -448,26 +448,6 @@ class CodeGen final : public CodeGenInterface int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function // (negative) int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) - int fiCalleeSavedPadding; // CalleeSaved offset padding (positive) - int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) - int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) - int fiSpDelta; // Stack pointer delta (negative) - }; - - FuncletFrameInfoDsc genFuncletInfo; - -#elif defined(TARGET_RISCV64) - - // A set of information that is used by funclet prolog and epilog generation. - // It is collected once, before funclet prologs and epilogs are generated, - // and used by all funclet prologs and epilogs, which must all be the same. - struct FuncletFrameInfoDsc - { - regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) - int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function - // (negative) - int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) - int fiCalleeSavedPadding; // CalleeSaved offset padding (positive) int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) int fiSpDelta; // Stack pointer delta (negative) @@ -1272,7 +1252,6 @@ class CodeGen final : public CodeGenInterface void genJmpMethod(GenTree* jmp); BasicBlock* genCallFinally(BasicBlock* block); #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // TODO: refactor for LA. void genCodeForJumpCompare(GenTreeOpCC* tree); #endif #if defined(TARGET_ARM64) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 417a7e6f31695d..c8211a7e731440 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4744,20 +4744,13 @@ void CodeGen::genFinalizeFrame() #endif // defined(TARGET_XARCH) #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (isFramePointerUsed()) - { - // For a FP based frame we have to push/pop the FP register - // - maskCalleeRegsPushed |= RBM_FPBASE; + // This assert check that we are not using REG_FP + assert(!regSet.rsRegsModified(RBM_FPBASE)); - // This assert check that we are not using REG_FP - // as both the frame pointer and as a codegen register - // - assert(!regSet.rsRegsModified(RBM_FPBASE)); - } + assert(isFramePointerUsed()); + // we always push FP/RA. See genPushCalleeSavedRegisters + maskCalleeRegsPushed |= (RBM_FPBASE | RBM_RA); - // we always push RA. See genPushCalleeSavedRegisters - maskCalleeRegsPushed |= RBM_RA; #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 2954a989c74668..1bd29a432ce170 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -190,8 +190,7 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* // reg1 - First register of pair to save. // reg2 - Second register of pair to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or -// zero). +// spDelta - Always zero for LoongArch64 now. // useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This // allows us to emit the "save_next" unwind code. // tmpReg - An available temporary register. Needed for the case of large frames. @@ -210,8 +209,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta <= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both // FP/SIMD @@ -221,16 +219,6 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, ins = INS_fst_d; } - if (spDelta != 0) - { - // generate addi.d SP,SP,-imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - - assert((spDelta + spOffset + 16) <= 0); - - assert(spOffset <= 2031); // 2047-16 - } - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); @@ -249,8 +237,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // Arguments: // reg1 - Register to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or -// zero). +// spDelta - Always zero for LoongArch64 now. // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. @@ -261,8 +248,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta <= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); instruction ins = INS_st_d; if (genIsValidFloatReg(reg1)) @@ -270,12 +256,6 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum ins = INS_fst_d; } - if (spDelta != 0) - { - // generate addi.d SP,SP,-imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - } - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); } @@ -290,8 +270,7 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum // reg1 - First register of pair to restore. // reg2 - Second register of pair to restore. // spOffset - The offset from SP to load reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or -// zero). +// spDelta - Always zero for LoongArch64 now. // useSaveNextPair - True if the last prolog instruction was to save the previous register pair. This // allows us to emit the "save_next" unwind code. // tmpReg - An available temporary register. Needed for the case of large frames. @@ -310,8 +289,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta >= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both // FP/SIMD @@ -321,27 +299,11 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, ins = INS_fld_d; } - if (spDelta != 0) - { - assert(!useSaveNextPair); - - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); - compiler->unwindSaveReg(reg2, spOffset + 8); - - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - - // generate addi.d SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - } - else - { - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); - compiler->unwindSaveReg(reg2, spOffset + 8); + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8); + compiler->unwindSaveReg(reg2, spOffset + 8); - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - } + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); } //------------------------------------------------------------------------ @@ -350,8 +312,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, // Arguments: // reg1 - Register to restore. // spOffset - The offset from SP to restore reg1 (must be positive or zero). -// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or -// zero). +// spDelta - Always zero for LoongArch64 now. // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. @@ -362,8 +323,7 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); - assert(spDelta >= 0); - assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned + assert(spDelta == 0); instruction ins = INS_ld_d; if (genIsValidFloatReg(reg1)) @@ -371,20 +331,8 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg ins = INS_fld_d; } - if (spDelta != 0) - { - // ld.d reg1,SP,offset - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - - // generate addi.d SP,SP,imm - genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true); - } - else - { - GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); - compiler->unwindSaveReg(reg1, spOffset); - } + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); + compiler->unwindSaveReg(reg1, spOffset); } //------------------------------------------------------------------------ @@ -519,12 +467,13 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask) // genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask. // // Arguments: -// regsMask - a mask of registers for prolog generation; -// spDelta - if non-zero, the amount to add to SP before the first register save (or together with it); -// spOffset - the offset from SP that is the beginning of the callee-saved register area; +// regsMask - a mask of registers for prolog generation; +// spDelta - Always zero for LoongArch64 now. +// spOffset - the offset from SP that is the beginning of the callee-saved register area; // void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) { + assert(spDelta == 0); const int slotSize = genGetSlotSizeForRegsInMask(regsMask); ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); @@ -536,19 +485,16 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i if (regPair.reg2 != REG_NA) { // We can use two SD instructions. - genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_R21, - nullptr); + genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, 0, regPair.useSaveNextPair, REG_R21, nullptr); spOffset += 2 * slotSize; } else { // No register pair; we use a SD instruction. - genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_R21, nullptr); + genPrologSaveReg(regPair.reg1, spOffset, 0, REG_R21, nullptr); spOffset += slotSize; } - - spDelta = 0; // We've now changed SP already, if necessary; don't do it again. } } @@ -574,34 +520,22 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i // // Arguments: // regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing. -// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that -// if non-zero spDelta, then this is the offset of the first save *after* that -// SP adjustment. -// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or -// zero). +// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// spDelta - Always zero for LoongArch64 now. // // Notes: // The save set can not contain FP/RA in which case FP/RA is saved along with the other callee-saved registers. // void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) { - assert(spDelta <= 0); + assert(spDelta == 0); - unsigned regsToSaveCount = genCountBits(regsToSaveMask); - if (regsToSaveCount == 0) + if (regsToSaveMask == 0) { - if (spDelta != 0) - { - // Currently this is the case for varargs only - // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. - genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } return; } - assert((spDelta % 16) == 0); - - assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED)); + assert(genCountBits(regsToSaveMask) <= genCountBits(RBM_CALLEE_SAVED)); // Save integer registers at higher addresses than floating-point registers. @@ -610,15 +544,14 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe if (maskSaveRegsFloat != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset); - spDelta = 0; + genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, 0, lowestCalleeSavedOffset); lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES; } if (maskSaveRegsInt != RBM_NONE) { - genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset); - // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this. + // No need to update spDelta. + genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, 0, lowestCalleeSavedOffset); } } @@ -627,11 +560,12 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe // // Arguments: // regsMask - a mask of registers for epilog generation; -// spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it); +// spDelta - Always zero for LoongArch64 now. // spOffset - the offset from SP that is the beginning of the callee-saved register area; // void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset) { + assert(spDelta == 0); const int slotSize = genGetSlotSizeForRegsInMask(regsMask); ArrayStack regStack(compiler->getAllocator(CMK_Codegen)); @@ -640,15 +574,6 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta int stackDelta = 0; for (int i = 0; i < regStack.Height(); ++i) { - bool lastRestoreInTheGroup = (i == regStack.Height() - 1); - bool updateStackDelta = lastRestoreInTheGroup && (spDelta != 0); - if (updateStackDelta) - { - // Update stack delta only if it is the last restore (the first save). - assert(stackDelta == 0); - stackDelta = spDelta; - } - RegPair regPair = regStack.Top(i); if (regPair.reg2 != REG_NA) { @@ -670,10 +595,9 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta // in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp(). // // Arguments: -// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. -// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. -// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or -// zero). +// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. +// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. +// spDelta - Always zero for LoongArch64 now. // // Here's an example restore sequence: // ld.d s8,sp,#xxx @@ -694,23 +618,15 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) { - assert(spDelta >= 0); - unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); - if (regsToRestoreCount == 0) + assert(spDelta == 0); + if (regsToRestoreMask == 0) { - if (spDelta != 0) - { - // Currently this is the case for varargs only - // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. - genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true); - } return; } - assert((spDelta % 16) == 0); - - // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED. - assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA)); + unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); + // The FP and RA are not in RBM_CALLEE_SAVED. + assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED)); // Point past the end, to start. We predecrement to find the offset to load from. static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); @@ -725,15 +641,13 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in if (maskRestoreRegsInt != RBM_NONE) { - int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment? - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, 0, spOffset); spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES; } if (maskRestoreRegsFloat != RBM_NONE) { - // If there is any spDelta, it must be used here. - genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset); + genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, 0, spOffset); // No need to update spOffset since it's not used after this. } } @@ -755,7 +669,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * filter: a0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) * finally/fault: none * - * The LOONGARCH64 funclet prolog is the following (Note: #framesz is total funclet frame size, + * The LoongArch64 funclet prolog is the following (Note: #framesz is total funclet frame size, * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16): * * Frame type liking: @@ -771,19 +685,17 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | incoming arguments | * +=======================+ <---- Caller's SP - * | OSR padding | // If required - * |-----------------------| - * | Varargs regs space | // Only for varargs main functions; 64 bytes + * | Varargs regs space | // Only for varargs main functions; not used for LA64. * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) * |-----------------------| - * ~ alignment padding ~ // To make the whole frame 16 byte aligned + * |Callee saved registers | // multiple of 8 bytes, not including FP/RA * |-----------------------| * | Saved FP, RA | // 16 bytes * |-----------------------| - * |Callee saved registers | // multiple of 8 bytes, not includting FP/RA + * ~ alignment padding ~ // To make the whole frame 16 byte aligned * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) * |-----------------------| <---- Ambient SP @@ -793,38 +705,22 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * V * * - * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, - * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack - * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 232 bytes: - * - * FP,RA registers - * 9 int callee-saved register s0-s8 - * 8 float callee-saved registers f24-f31 - * 8 saved integer argument registers a0-a7, if varargs function support. - * 1 PSP slot - * == 20 slots * 8 bytes = 160 bytes. - * * The outgoing argument size, however, can be very large, if we call a function that takes a large number of * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of - * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before - * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset - * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument - * space. Both changes to SP might need to add alignment padding. - * - * In addition to the above "standard" frames, we also need to support a frame where the saved FP/RA are at the - * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/RA - * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc. - * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/RA - * changed, but where the alignment padding is placed has also changed. - * + * outgoing arguments for any call). * - * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP - * as in the main function. + * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, + * and that location is the same relative to Caller-SP as in the main function where higher than + * the callee-saved registers. + * That is to say, the PSPSym's relative offset to Caller-SP is not depended on the callee-saved registers. + * TODO-LoongArch64: the funclet's callee-saved registers should not shared with main function. * * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we * must add buffer space for the saved varargs/argument registers here, if the main function did the same. * + * Note that localloc cannot be used in a funclet. + * * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. * @@ -880,7 +776,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block) { #ifdef DEBUG if (verbose) + { printf("*************** In genFuncletProlog()\n"); + } #endif assert(block != NULL); @@ -910,42 +808,39 @@ void CodeGen::genFuncletProlog(BasicBlock* block) maskArgRegsLiveIn = RBM_A0; } - regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; - int regsSavedSize = (compiler->compCalleeRegsPushed - 2) << 3; + regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; + int FP_offset = genFuncletInfo.fiSP_to_CalleeSaved_delta; - int SP_to_CalleeSaved_delta = genFuncletInfo.fiSP_to_CalleeSaved_delta; - if ((SP_to_CalleeSaved_delta + regsSavedSize + genFuncletInfo.fiCalleeSavedPadding) <= 2040) + if ((FP_offset + (genCountBits(maskSaveRegs) << 3)) <= (2040 - 16)) // no FP/RA. { - SP_to_CalleeSaved_delta += genFuncletInfo.fiCalleeSavedPadding; - genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true); - genSaveCalleeSavedRegistersHelp(maskSaveRegs, SP_to_CalleeSaved_delta, 0); - SP_to_CalleeSaved_delta += regsSavedSize; + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, SP_to_CalleeSaved_delta); - compiler->unwindSaveReg(REG_RA, SP_to_CalleeSaved_delta); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, SP_to_CalleeSaved_delta + 8); - compiler->unwindSaveReg(REG_FP, SP_to_CalleeSaved_delta + 8); + genSaveCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16, 0); } else { assert(frameSize < -2040); - int SP_delta = frameSize + SP_to_CalleeSaved_delta; - genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true); + genStackPointerAdjustment(frameSize + (FP_offset & -16), REG_R21, nullptr, true); - genSaveCalleeSavedRegistersHelp(maskSaveRegs, genFuncletInfo.fiCalleeSavedPadding, 0); - regsSavedSize += genFuncletInfo.fiCalleeSavedPadding; + frameSize = -(FP_offset & -16); + FP_offset &= 0xf; - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, regsSavedSize); - compiler->unwindSaveReg(REG_RA, regsSavedSize); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, regsSavedSize + 8); - compiler->unwindSaveReg(REG_FP, regsSavedSize + 8); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - genStackPointerAdjustment(-SP_to_CalleeSaved_delta, REG_R21, nullptr, /* reportUnwindData */ true); + genSaveCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16, 0); + + genStackPointerAdjustment(frameSize, REG_R21, nullptr, true); } // This is the end of the OS-reported prolog for purposes of unwinding @@ -1012,41 +907,28 @@ void CodeGen::genFuncletEpilog() int frameSize = genFuncletInfo.fiSpDelta; assert(frameSize < 0); - regMaskTP regsToRestoreMask = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; - int regsRestoreSize = (compiler->compCalleeRegsPushed - 2) << 3; + regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; + int FP_offset = genFuncletInfo.fiSP_to_CalleeSaved_delta; - int SP_to_CalleeSaved_delta = genFuncletInfo.fiSP_to_CalleeSaved_delta; - if ((SP_to_CalleeSaved_delta + regsRestoreSize + genFuncletInfo.fiCalleeSavedPadding) <= 2040) + if ((FP_offset + (genCountBits(maskSaveRegs) << 3)) > (2040 - 16)) // no FP/RA. { - SP_to_CalleeSaved_delta += genFuncletInfo.fiCalleeSavedPadding; - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, SP_to_CalleeSaved_delta, 0); - SP_to_CalleeSaved_delta += regsRestoreSize; - - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, SP_to_CalleeSaved_delta); - compiler->unwindSaveReg(REG_RA, SP_to_CalleeSaved_delta); + assert(frameSize < -2040); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, SP_to_CalleeSaved_delta + 8); - compiler->unwindSaveReg(REG_FP, SP_to_CalleeSaved_delta + 8); + genStackPointerAdjustment(FP_offset & -16, REG_R21, nullptr, /* reportUnwindData */ true); - genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); + frameSize += FP_offset & -16; + FP_offset = FP_offset & 0xf; } - else - { - assert(frameSize < -2040); - - genStackPointerAdjustment(SP_to_CalleeSaved_delta, REG_R21, nullptr, /* reportUnwindData */ true); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiCalleeSavedPadding, 0); - regsRestoreSize += genFuncletInfo.fiCalleeSavedPadding; + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16, 0); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, regsRestoreSize); - compiler->unwindSaveReg(REG_RA, regsRestoreSize); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, regsRestoreSize + 8); - compiler->unwindSaveReg(REG_FP, regsRestoreSize + 8); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - genStackPointerAdjustment(-frameSize - SP_to_CalleeSaved_delta, REG_R21, nullptr, /* reportUnwindData */ true); - } + genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true); GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0); compiler->unwindReturn(REG_RA); @@ -1072,7 +954,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() } assert(isFramePointerUsed()); - // The frame size and offsets must be finalized assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); @@ -1080,58 +961,40 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_RA) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0; - // Because a method and funclets must have the same caller-relative PSPSym offset, // if there is a PSPSym, we have to pad the funclet frame size for OSR. // - unsigned osrPad = 0; - if (compiler->opts.IsOSR() && (PSPSize > 0)) + int osrPad = 0; + if (compiler->opts.IsOSR()) { - osrPad = compiler->info.compPatchpointInfo->TotalFrameSize(); + osrPad -= compiler->info.compPatchpointInfo->TotalFrameSize(); // OSR pad must be already aligned to stack size. assert((osrPad % STACK_ALIGN) == 0); } - genFuncletInfo.fiCalleeSavedPadding = 0; - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad; - - unsigned regsSavedSize = genCountBits(rsMaskSaveRegs) << 3; - assert(genCountBits(rsMaskSaveRegs) == compiler->compCalleeRegsPushed); - - unsigned saveRegsPlusPSPSize = regsSavedSize + PSPSize; - - assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); - unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + /* Now save it for future use */ + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() + osrPad; - unsigned funcletFrameSize = osrPad + saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; - unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); + int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; - int SP_to_CalleeSaved_delta = compiler->lvaOutgoingArgSpaceSize; - if ((SP_to_CalleeSaved_delta + regsSavedSize) >= 2040) - { - int offset = funcletFrameSizeAligned - SP_to_CalleeSaved_delta; - SP_to_CalleeSaved_delta = AlignUp((UINT)offset, STACK_ALIGN); + genFuncletInfo.fiSP_to_CalleeSaved_delta = funcletFrameSize; - genFuncletInfo.fiCalleeSavedPadding = SP_to_CalleeSaved_delta - offset; - } + funcletFrameSize += genCountBits(rsMaskSaveRegs) * REGSIZE_BYTES; - if (compiler->lvaMonAcquired != BAD_VAR_NUM && !compiler->opts.IsOSR()) + int delta_PSP = -TARGET_POINTER_SIZE; + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { - // We furthermore allocate the "monitor acquired" bool between PSP and - // the saved registers because this is part of the EnC header. - // Note that OSR methods reuse the monitor bool created by tier 0. - osrPad += compiler->lvaLclSize(compiler->lvaMonAcquired); + delta_PSP -= TARGET_POINTER_SIZE; } - /* Now save it for future use */ - genFuncletInfo.fiSpDelta = -(int)funcletFrameSizeAligned; - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_CalleeSaved_delta = SP_to_CalleeSaved_delta; + funcletFrameSize = funcletFrameSize - delta_PSP - osrPad; + funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); - genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSizeAligned - osrPad - 8; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = -(int)osrPad - 8; + genFuncletInfo.fiSpDelta = -funcletFrameSize; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSize + delta_PSP + osrPad; + genFuncletInfo.fiCallerSP_to_PSP_slot_delta = osrPad + delta_PSP; #ifdef DEBUG if (verbose) @@ -4278,9 +4141,17 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) int CodeGenInterface::genSPtoFPdelta() const { assert(isFramePointerUsed()); - assert(compiler->compCalleeRegsPushed >= 2); + assert(compiler->compCalleeRegsPushed >= 2); // always FP/RA. - int delta = compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3) - 8; + int delta = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + delta -= TARGET_POINTER_SIZE; + } + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) + { + delta -= TARGET_POINTER_SIZE; + } assert(delta >= 0); return delta; @@ -7660,8 +7531,8 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * ... * st.d s8,sp,off2+8*8 * - * st.d ra,sp,off3 - * st.d fp,sp,off3+8 + * st.d ra,sp,off3+8 + * st.d fp,sp,off3 * * Notes: * 1. FP is always saved, and the first store is FP, RA. @@ -7669,37 +7540,41 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * 3. For frames with varargs, not implemented completely and not tested ! * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). * - * For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top - * of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same - * rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + * For functions with GS and localloc, we had saved the frame pointer and RA at the top + * of the frame. Note that the funclet frames must follow the same rule, + * and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. * * The frames look like the following (simplified to only include components that matter for establishing the * frames). See also Compiler::lvaAssignFrameOffsets(). * - * * The LoongArch64's frame layout is liking: * + * If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address + * (FP and RA) are protected from buffer overrun by the GS cookie. + * So we always save the FP/RA along with the rest of the callee-saved registers above. + * * | | * |-----------------------| * | incoming arguments | * +=======================+ <---- Caller's SP - * | Arguments Or | // if needed. * | Varargs regs space | // Only for varargs functions; (varargs not implemented for LoongArch64) * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) + * | PSPSym | // 8 bytes, Only for frames with EH, (omitted in NativeAOT ABI) * |-----------------------| - * | locals, temps, etc. | + * |Callee saved registers | // not including FP/RA; multiple of 8 bytes * |-----------------------| - * | possible GS cookie | + * | Saved RA | // 8 bytes * |-----------------------| * | Saved FP | // 8 bytes * |-----------------------| - * | Saved RA | // 8 bytes + * | possible GS cookie | * |-----------------------| - * |Callee saved registers | // not including FP/RA; multiple of 8 bytes + * | locals, temps, etc. | + * |-----------------------| + * | possible GS cookie | * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) * |-----------------------| <---- Ambient SP @@ -7748,6 +7623,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe regSet.rsMaskCalleeSaved = rsPushRegs | RBM_FPBASE | RBM_RA; #ifdef DEBUG + JITDUMP("Frame info. #outsz=%d; #framesz=%d; LclFrameSize=%d;\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + genTotalFrameSize(), compiler->compLclFrameSize); + if (compiler->compCalleeRegsPushed != genCountBits(regSet.rsMaskCalleeSaved)) { printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", @@ -7770,84 +7648,52 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #endif // DEBUG - // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we - // generate based on various sizes. - int frameType = 0; - - // The amount to add from SP before starting to store the callee-saved registers. - int calleeSaveSPDelta = 0; - - // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address - // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses, - // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will - // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our - // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above - // the GS cookie. - // - // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to - // create a frame pointer chain. - // + int totalFrameSize = genTotalFrameSize(); + int leftFrameSize = 0; + int localFrameSize = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + localFrameSize -= TARGET_POINTER_SIZE; + } + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) + { + localFrameSize -= TARGET_POINTER_SIZE; + } - // This will be the starting place for saving the callee-saved registers, in increasing order. - int offset = compiler->lvaOutgoingArgSpaceSize; +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("Frame info. #outsz=%d; #framesz=%d; lcl=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + genTotalFrameSize(), localFrameSize); + } +#endif - int totalFrameSize = genTotalFrameSize(); - // The (totalFrameSize <= 2040) condition ensures the offsets of st.d/ld.d. + int FP_offset = localFrameSize; if (totalFrameSize <= 2040) { GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); compiler->unwindAllocStack(totalFrameSize); - - // Case #1. - // - // Generate: - // addi.d sp, sp, -framesz - // st.d callee_saved_registers ### not including the fp and ra. - // st.d ra,sp,outsz - // st.d fp,sp,outsz+8 - // - // After saving callee-saved registers, ra and fp, we establish the frame pointer with: - // addi.d fp, sp, (the offset of saving fp) - // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match. - - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); - - frameType = 1; } else { - JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); - - frameType = 2; - - if ((offset + (compiler->compCalleeRegsPushed << 3)) >= 2040) - { - offset = totalFrameSize - compiler->lvaOutgoingArgSpaceSize; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; - - genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); - } - else + if ((localFrameSize + (compiler->compCalleeRegsPushed << 3)) > 2040) { - genStackPointerAdjustment(-totalFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true); + leftFrameSize = localFrameSize & -16; + totalFrameSize = totalFrameSize - (localFrameSize & -16); + FP_offset = localFrameSize & 0xf; } + genStackPointerAdjustment(-totalFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true); } + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); - genSaveCalleeSavedRegistersHelp(rsPushRegs, offset, 0); - offset += (int)(genCountBits(rsPushRegs) << 3); - - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset); - compiler->unwindSaveReg(REG_RA, offset); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset + 8); - compiler->unwindSaveReg(REG_FP, offset + 8); + genSaveCalleeSavedRegistersHelp(rsPushRegs, FP_offset + 16, 0); - JITDUMP(" offsetSpToSavedFp=%d\n", offset + 8); - genEstablishFramePointer(offset + 8, /* reportUnwindData */ true); + JITDUMP(" offsetSpToSavedFp=%d\n", FP_offset); + genEstablishFramePointer(FP_offset, /* reportUnwindData */ true); // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't @@ -7858,19 +7704,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe NYI_LOONGARCH64("genPushCalleeSavedRegisters unsupports compIsVarArgs"); } -#ifdef DEBUG - if (compiler->opts.disAsm) - { - assert(frameType != 0); - printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType); - } -#endif - - if (calleeSaveSPDelta != 0) + if (leftFrameSize != 0) { - assert(frameType == 2); - calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; - genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + genStackPointerAdjustment(-leftFrameSize, initReg, pInitRegZeroed, /* reportUnwindData */ true); } } @@ -7882,85 +7718,78 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) assert(isFramePointerUsed()); - // This will be the starting place for restoring the callee-saved registers, in decreasing order. - int calleeSaveSPOffset = 0; - int remainingSPSize = 0; - int totalFrameSize = genTotalFrameSize(); - if (totalFrameSize <= 2040) + int localFrameSize = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + localFrameSize -= TARGET_POINTER_SIZE; + } + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, dspBool(compiler->compLocallocUsed)); + localFrameSize -= TARGET_POINTER_SIZE; + } + + JITDUMP("Frame type. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); + emitter* emit = GetEmitter(); + int FP_offset = localFrameSize; + int remainingSPSize = totalFrameSize; + if (totalFrameSize <= 2040) + { if (compiler->compLocallocUsed) { - int SPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize; - + int SPtoFPdelta = genSPtoFPdelta(); // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; - remainingSPSize = totalFrameSize; } else { - JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " - "localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, - dspBool(compiler->compLocallocUsed)); - - if ((compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3)) > 2047) + if (compiler->compLocallocUsed) { - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize & -16; - if (compiler->compLocallocUsed) + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + if (emitter::isValidSimm12(SPtoFPdelta)) { - int SPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8; - - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); - compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); } else { - genStackPointerAdjustment(calleeSaveSPOffset, REG_RA, nullptr, /* reportUnwindData */ true); + emit->emitIns_I_la(EA_PTRSIZE, REG_RA, SPtoFPdelta); + emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, REG_RA); } - remainingSPSize = totalFrameSize - calleeSaveSPOffset; - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize - calleeSaveSPOffset; } - else + if ((localFrameSize + (compiler->compCalleeRegsPushed << 3)) > 2040) { - if (compiler->compLocallocUsed) - { - int SPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize; + remainingSPSize = localFrameSize & -16; + genStackPointerAdjustment(remainingSPSize, REG_RA, nullptr, /* reportUnwindData */ true); - // Restore sp from fp - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); - compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); - } - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; - remainingSPSize = totalFrameSize; + remainingSPSize = totalFrameSize - remainingSPSize; + FP_offset = localFrameSize & 0xf; } } - JITDUMP(" calleeSaveSPOffset=%d\n", calleeSaveSPOffset); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, 0); - calleeSaveSPOffset += (compiler->compCalleeRegsPushed - 2) << 3; + JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16, 0); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset); - compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset); + emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset + 8); - compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset + 8); + emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); if (emitter::isValidUimm11(remainingSPSize)) { - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, remainingSPSize); + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, remainingSPSize); } else { - GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, remainingSPSize); - GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21); + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, remainingSPSize); + emit->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21); } compiler->unwindAllocStack(remainingSPSize); @@ -7972,12 +7801,12 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) if (emitter::isValidUimm11(tier0FrameSize)) { - GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, tier0FrameSize); + emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, tier0FrameSize); } else { - GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, tier0FrameSize); - GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21); + emit->emitIns_I_la(EA_PTRSIZE, REG_R21, tier0FrameSize); + emit->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21); } compiler->unwindAllocStack(tier0FrameSize); } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 546ba7b3180899..b7c119be94987c 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -770,8 +770,8 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * addi sp, sp, -#framesz ; establish the frame * sd s1, #outsz(sp) ; save callee-saved registers, as necessary * sd s2, #(outsz+8)(sp) - * sd ra, #(outsz+?)(sp) ; save RA (8 bytes) - * sd fp, #(outsz+?+8)(sp) ; save FP (8 bytes) + * sd ra, #(outsz+?+8)(sp) ; save RA (8 bytes) + * sd fp, #(outsz+?)(sp) ; save FP (8 bytes) * * The funclet frame layout: * @@ -779,8 +779,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | incoming arguments | * +=======================+ <---- Caller's SP - * | Arguments Or | // if needed - * | Varargs regs space | // Only for varargs functions; NYI on RV64 + * | Varargs regs space | // Only for varargs main functions; not used for RV64. * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| @@ -788,11 +787,9 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned * |-----------------------| - * | Saved FP | // 8 bytes + * |Callee saved registers | // multiple of 8 bytes, not including FP/RA * |-----------------------| - * | Saved RA | // 8 bytes - * |-----------------------| - * |Callee saved registers | // multiple of 8 bytes, not includting RA/FP + * | Saved FP, RA | // 16 bytes * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) * |-----------------------| <---- Ambient SP @@ -801,31 +798,27 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * | | downward | * V * - * Note, that SP only change once. That means, there will be a maximum of one alignment slot needed. - * Also remember, the stack oiubter needs to be 16 byte aligned at all times. - * The size of the PSP slot plus callee-saved registers space is a maximum of 280 bytes: - * - * RA,FP registers - * 11 int callee-saved register s1-s11 - * 12 float callee-saved registers f8-f9, f18-f27 - * 8 saved integer argument registers a0-a7, if varargs function support. - * 1 PSP slot - * 1 alignment slot or monitor acquired slot - * == 35 slots * 8 bytes = 280 bytes. * * The outgoing argument size, however, can be very large, if we call a function that takes a large number of * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of - * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before - * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset - * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument - * space. Both changes to SP might need to add alignment padding. + * outgoing arguments for any call). + * + * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, + * and that location is the same relative to Caller-SP as in the main function where higher than + * the callee-saved registers. + * That is to say, the PSPSym's relative offset to Caller-SP is not depended on the callee-saved registers. + * + * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we + * must add buffer space for the saved varargs/argument registers here, if the main function did the same. + * + * Note that localloc cannot be used in a funclet. * * An example epilog sequence: * addi sp, sp, #outsz ; if any outgoing argument space * ld s1, #(xxx-8)(sp) ; restore callee-saved registers * ld s2, #xxx(sp) - * ld ra, #(xxx+?-8)(sp) ; restore RA + * ld ra, #(xxx+?+8)(sp) ; restore RA * ld fp, #(xxx+?)(sp) ; restore FP * addi sp, sp, #framesz * jarl zero, ra @@ -840,8 +833,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block) printf("*************** In genFuncletProlog()\n"); } #endif + // TODO-RISCV64: Implement varargs (NYI_RISCV64) - // TODO-RISCV64-CQ: We can use C extension for optimization assert(block != NULL); assert(block->HasFlag(BBF_FUNCLET_BEG)); @@ -852,9 +845,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block) compiler->unwindBegProlog(); - const bool isFilter = (block->bbCatchTyp == BBCT_FILTER); - const int frameSize = genFuncletInfo.fiSpDelta; - + bool isFilter = (block->bbCatchTyp == BBCT_FILTER); + int frameSize = genFuncletInfo.fiSpDelta; assert(frameSize < 0); regMaskTP maskArgRegsLiveIn; @@ -871,53 +863,39 @@ void CodeGen::genFuncletProlog(BasicBlock* block) maskArgRegsLiveIn = RBM_A0; } - regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; - int regsSavedSize = (compiler->compCalleeRegsPushed - 2) << 3; - - int calleeSavedDelta = genFuncletInfo.fiSP_to_CalleeSaved_delta; - - emitter* emit = GetEmitter(); + regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; + int FP_offset = genFuncletInfo.fiSP_to_CalleeSaved_delta; - if (calleeSavedDelta + regsSavedSize + genFuncletInfo.fiCalleeSavedPadding <= 2040) + if ((FP_offset + (genCountBits(maskSaveRegs) << 3)) <= (2040 - 16)) // no FP/RA. { - calleeSavedDelta += genFuncletInfo.fiCalleeSavedPadding; - - // addi sp, sp, #frameSize genStackPointerAdjustment(frameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - genSaveCalleeSavedRegistersHelp(maskSaveRegs, calleeSavedDelta, 0); - calleeSavedDelta += regsSavedSize; + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - // sd ra, #calleeSavedDelta(sp) - emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSavedDelta); - compiler->unwindSaveReg(REG_RA, calleeSavedDelta); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - // sd fp, #(calleeSavedDelta+8)(sp) - emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSavedDelta + 8); - compiler->unwindSaveReg(REG_FP, calleeSavedDelta + 8); + genSaveCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16, 0); } else { assert(frameSize < -2040); - int spDelta = frameSize + calleeSavedDelta; + genStackPointerAdjustment(frameSize + (FP_offset & -16), REG_SCRATCH, nullptr, true); - // addi sp, sp, #spDelta - genStackPointerAdjustment(spDelta, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + frameSize = -(FP_offset & -16); + FP_offset &= 0xf; - genSaveCalleeSavedRegistersHelp(maskSaveRegs, genFuncletInfo.fiCalleeSavedPadding, 0); - regsSavedSize += genFuncletInfo.fiCalleeSavedPadding; + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - // sd ra, #regsSavedSize(sp) - emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, regsSavedSize); - compiler->unwindSaveReg(REG_RA, regsSavedSize); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - // sd fp, #(regsSavedSize+8)(sp) - emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, regsSavedSize + 8); - compiler->unwindSaveReg(REG_FP, regsSavedSize + 8); + genSaveCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16, 0); - // addi sp, sp -#calleeSavedDelta - genStackPointerAdjustment(-calleeSavedDelta, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + genStackPointerAdjustment(frameSize, REG_SCRATCH, nullptr, true); } // This is the end of the OS-reported prolog for purposes of unwinding @@ -977,66 +955,38 @@ void CodeGen::genFuncletEpilog() printf("*************** In genFuncletEpilog()\n"); } #endif - // TODO-RISCV64: Implement varargs (NYI_RISCV64) - // TODO-RISCV64-CQ: We can use C extension for optimization ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); compiler->unwindBegEpilog(); - const int frameSize = genFuncletInfo.fiSpDelta; - + int frameSize = genFuncletInfo.fiSpDelta; assert(frameSize < 0); - regMaskTP maskRestoreRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; - int regsRestoreSize = (compiler->compCalleeRegsPushed - 2) << 3; - - int calleeSavedDelta = genFuncletInfo.fiSP_to_CalleeSaved_delta; + regMaskTP maskSaveRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED; + int FP_offset = genFuncletInfo.fiSP_to_CalleeSaved_delta; - emitter* emit = GetEmitter(); - regNumber tempReg = rsGetRsvdReg(); - - if (calleeSavedDelta + regsRestoreSize + genFuncletInfo.fiCalleeSavedPadding <= 2040) + if ((FP_offset + (genCountBits(maskSaveRegs) << 3)) > (2040 - 16)) // no FP/RA. { - calleeSavedDelta += genFuncletInfo.fiCalleeSavedPadding; - genRestoreCalleeSavedRegistersHelp(maskRestoreRegs, calleeSavedDelta, 0); - calleeSavedDelta += regsRestoreSize; - - // ld ra, #calleeSavedDelta(sp) - emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSavedDelta); - compiler->unwindSaveReg(REG_RA, calleeSavedDelta); + assert(frameSize < -2040); - // ld fp, #(calleeSavedDelta+8)(sp) - emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSavedDelta + 8); - compiler->unwindSaveReg(REG_FP, calleeSavedDelta + 8); + genStackPointerAdjustment(FP_offset & -16, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - // addi sp, sp, -#frameSize - genStackPointerAdjustment(-frameSize, tempReg, nullptr, /* reportUnwindData */ true); + frameSize += FP_offset & -16; + FP_offset = FP_offset & 0xf; } - else - { - assert(frameSize < -2040); - - // addi sp, sp, #calleeSavedDelta - genStackPointerAdjustment(calleeSavedDelta, tempReg, nullptr, /* reportUnwindData */ true); - genRestoreCalleeSavedRegistersHelp(maskRestoreRegs, genFuncletInfo.fiCalleeSavedPadding, 0); - regsRestoreSize += genFuncletInfo.fiCalleeSavedPadding; + genRestoreCalleeSavedRegistersHelp(maskSaveRegs, FP_offset + 16, 0); - // ld ra, #regsRestoreSize(sp) - emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, regsRestoreSize); - compiler->unwindSaveReg(REG_RA, regsRestoreSize); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - // ld fp, #(regsRestoreSize+8)(sp) - emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, regsRestoreSize + 8); - compiler->unwindSaveReg(REG_FP, regsRestoreSize + 8); + GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - // addi sp, sp, -#(frameSize + calleeSavedDelta) - genStackPointerAdjustment(-(frameSize + calleeSavedDelta), tempReg, nullptr, /* reportUnwindData */ true); - } + genStackPointerAdjustment(-frameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - // jarl zero, ra - emit->emitIns_R_R_I(INS_jalr, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0); + GetEmitter()->emitIns_R_R_I(INS_jalr, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0); compiler->unwindReturn(REG_RA); compiler->unwindEndEpilog(); @@ -1059,7 +1009,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() } assert(isFramePointerUsed()); - // The frame size and offsets must be finalized assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); @@ -1067,74 +1016,56 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_RA) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - unsigned pspSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0; - - // If there is a PSP slot, we have to pad the funclet frame size for OSR. - // For more details see CodeGen::genFuncletProlog + // Because a method and funclets must have the same caller-relative PSPSym offset, + // if there is a PSPSym, we have to pad the funclet frame size for OSR. // - unsigned osrPad = 0; - if (compiler->opts.IsOSR() && (pspSize != 0)) + int osrPad = 0; + if (compiler->opts.IsOSR()) { - osrPad = compiler->info.compPatchpointInfo->TotalFrameSize(); + osrPad -= compiler->info.compPatchpointInfo->TotalFrameSize(); - // osrPad must be aligned to stackSize - assert(osrPad % STACK_ALIGN == 0); + // OSR pad must be already aligned to stack size. + assert((osrPad % STACK_ALIGN) == 0); } - genFuncletInfo.fiCalleeSavedPadding = 0; - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad; - - unsigned savedRegsSize = genCountBits(rsMaskSaveRegs); - assert(savedRegsSize == compiler->compCalleeRegsPushed); - savedRegsSize <<= 3; + /* Now save it for future use */ + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() + osrPad; - unsigned saveRegsPlusPSPSize = savedRegsSize + pspSize; + int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; - assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); - unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); + genFuncletInfo.fiSP_to_CalleeSaved_delta = funcletFrameSize; - unsigned funcletFrameSize = osrPad + saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; - unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN); + funcletFrameSize += genCountBits(rsMaskSaveRegs) * REGSIZE_BYTES; - int SP_to_CalleeSaved_delta = compiler->lvaOutgoingArgSpaceSize; - if ((SP_to_CalleeSaved_delta + savedRegsSize) >= 2040) + int delta_PSP = -TARGET_POINTER_SIZE; + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { - int offset = funcletFrameSizeAligned - SP_to_CalleeSaved_delta; - SP_to_CalleeSaved_delta = AlignUp((UINT)offset, STACK_ALIGN); - - genFuncletInfo.fiCalleeSavedPadding = SP_to_CalleeSaved_delta - offset; + delta_PSP -= TARGET_POINTER_SIZE; } - if (compiler->lvaMonAcquired != BAD_VAR_NUM && !compiler->opts.IsOSR()) - { - // We furthermore allocate the "monitor acquired" bool between PSP and - // the saved registers because this is part of the EnC header. - // Note that OSR methods reuse the monitor bool created by tier 0. - osrPad += compiler->lvaLclSize(compiler->lvaMonAcquired); - } + funcletFrameSize = funcletFrameSize - delta_PSP - osrPad; + funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); - /* Now save it for future use */ - genFuncletInfo.fiSpDelta = -(int)funcletFrameSizeAligned; + genFuncletInfo.fiSpDelta = -funcletFrameSize; genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_CalleeSaved_delta = SP_to_CalleeSaved_delta; - genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSizeAligned - osrPad - 8; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = -(int)osrPad - 8; + genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSize + delta_PSP + osrPad; + genFuncletInfo.fiCallerSP_to_PSP_slot_delta = osrPad + delta_PSP; #ifdef DEBUG if (verbose) { printf("\n"); printf("Funclet prolog / epilog info\n"); - printf(" Save regs: "); + printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); if (compiler->opts.IsOSR()) { - printf(" OSR Pad: %d\n", osrPad); + printf(" OSR Pad: %d\n", osrPad); } - printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); + printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); - printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); + printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 0); @@ -4224,9 +4155,17 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree) int CodeGenInterface::genSPtoFPdelta() const { assert(isFramePointerUsed()); - assert(compiler->compCalleeRegsPushed >= 2); + assert(compiler->compCalleeRegsPushed >= 2); // always FP/RA. - int delta = compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3) - 8; + int delta = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + delta -= TARGET_POINTER_SIZE; + } + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) + { + delta -= TARGET_POINTER_SIZE; + } assert(delta >= 0); return delta; @@ -7733,8 +7672,8 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * sd s11, #(offset+8*10)(sp) * * ; save ra, fp - * sd ra, #offset3(sp) ; save RA (8 bytes) - * sd fp, #(offset3+8)(sp) ; save FP (8 bytes) + * sd ra, #offset3+8(sp) ; save RA (8 bytes) + * sd fp, #(offset3)(sp) ; save FP (8 bytes) * * Notes: * 1. FP is always saved, and the first store is FP, RA. @@ -7742,9 +7681,9 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * 3. For frames with varargs, not implemented completely and not tested ! * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). * - * For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top - * of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same - * rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + * For functions with GS and localloc, we had saved the frame pointer and RA at the top + * of the frame. Note that the funclet frames must follow the same rule, + * and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. * * The frames look like the following (simplified to only include components that matter for establishing the @@ -7752,6 +7691,10 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * * The RISC-V's frame layout is liking: * + * If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address + * (FP and RA) are protected from buffer overrun by the GS cookie. + * So we always save the FP/RA along with the rest of the callee-saved registers above. + * * | | * |-----------------------| * | incoming arguments | @@ -7763,15 +7706,17 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * |-----------------------| * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) * |-----------------------| - * | locals, temps, etc. | - * |-----------------------| - * | possible GS cookie | + * |Callee saved registers | // not including FP/RA; multiple of 8 bytes * |-----------------------| * | Saved FP | // 8 bytes * |-----------------------| * | Saved RA | // 8 bytes * |-----------------------| - * |Callee saved registers | // not including FP/RA; multiple of 8 bytes + * | possible GS cookie | + * |-----------------------| + * | locals, temps, etc. | + * |-----------------------| + * | possible GS cookie | * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0) * |-----------------------| <---- Ambient SP @@ -7785,12 +7730,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe { assert(compiler->compGeneratingProlog); - // The 'initReg' could have been calculated as one of the callee-saved registers (let's say T0, T1 and T2 are in - // use, so the next possible register is S1, which should be callee-save register). This is fine, as long as we - // save callee-saved registers before using 'initReg' for the first time. Instead, we can use REG_SCRATCH - // beforehand. We don't care if REG_SCRATCH will be overwritten, so we'll skip 'RegZeroed check'. - // - // Unlike on x86/x64, we can also push float registers to stack regMaskTP rsPushRegs = regSet.rsGetModifiedCalleeSavedRegsMask(); #if ETW_EBP_FRAMED @@ -7800,7 +7739,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #endif - // On RV64 we always use the FP (frame-pointer) + // We always use the FP (frame-pointer). assert(isFramePointerUsed()); // @@ -7823,25 +7762,25 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe // is not worth it. // - // we will push callee-saved registers along with fp and ra registers to stack - regMaskTP rsPushRegsMask = rsPushRegs | RBM_FP | RBM_RA; - regSet.rsMaskCalleeSaved = rsPushRegsMask; + regSet.rsMaskCalleeSaved = rsPushRegs | RBM_FPBASE | RBM_RA; #ifdef DEBUG - if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegsMask)) + JITDUMP("Frame info. #outsz=%d; #framesz=%d; LclFrameSize=%d;\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + genTotalFrameSize(), compiler->compLclFrameSize); + + if (compiler->compCalleeRegsPushed != genCountBits(regSet.rsMaskCalleeSaved)) { printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", - compiler->compCalleeRegsPushed, genCountBits(rsPushRegsMask)); - dspRegMask(rsPushRegsMask); + compiler->compCalleeRegsPushed, genCountBits(rsPushRegs | RBM_FPBASE | RBM_RA)); + dspRegMask(rsPushRegs | RBM_FPBASE | RBM_RA); printf("\n"); - assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegsMask)); + assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs | RBM_FPBASE | RBM_RA)); } if (verbose) { - regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_FLT_CALLEE_SAVED; - regMaskTP maskSaveRegsInt = rsPushRegs & RBM_INT_CALLEE_SAVED; - + regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; + regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; printf("Save float regs: "); dspRegMask(maskSaveRegsFloat); printf("\n"); @@ -7851,80 +7790,57 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #endif // DEBUG - // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we - // generate based on various sizes. - int frameType = 0; - - // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the - // first save instruction as a "predecrement" amount, if possible. - int calleeSaveSPDelta = 0; - - // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address - // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses, - // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will - // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our - // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above - // the GS cookie. - // - // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to - // create a frame pointer chain. - // - - // This will be the starting place for saving the callee-saved registers, in increasing order. - int offset = compiler->lvaOutgoingArgSpaceSize; - int totalFrameSize = genTotalFrameSize(); + int leftFrameSize = 0; + int localFrameSize = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + localFrameSize -= TARGET_POINTER_SIZE; + } + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) + { + localFrameSize -= TARGET_POINTER_SIZE; + } - emitter* emit = GetEmitter(); +#ifdef DEBUG + if (compiler->opts.disAsm) + { + printf("Frame info. #outsz=%d; #framesz=%d; lcl=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), + genTotalFrameSize(), localFrameSize); + } +#endif - // ensure offset of sd/ld + int FP_offset = localFrameSize; if (totalFrameSize <= 2040) { - frameType = 1; - - emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize); compiler->unwindAllocStack(totalFrameSize); - - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); } else { - frameType = 2; - // we have to adjust stack pointer; probably using add instead of addi - - JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, compiler->compLclFrameSize); - - if ((offset + (compiler->compCalleeRegsPushed << 3)) >= 2040) - { - offset = totalFrameSize - compiler->lvaOutgoingArgSpaceSize; - calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN); - offset = calleeSaveSPDelta - offset; - - genStackPointerAdjustment(-calleeSaveSPDelta, REG_SCRATCH, nullptr, /* reportUnwindData */ true); - } - else + if ((localFrameSize + (compiler->compCalleeRegsPushed << 3)) > 2040) { - genStackPointerAdjustment(-totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); + leftFrameSize = localFrameSize & -16; + totalFrameSize = totalFrameSize - (localFrameSize & -16); + FP_offset = localFrameSize & 0xf; } + // The 'initReg' could have been calculated as one of the callee-saved registers (let's say T0, T1 and T2 are in + // use, so the next possible register is S1, which should be callee-save register). This is fine, as long as we + // save callee-saved registers before using 'initReg' for the first time. Instead, we can use REG_SCRATCH + // beforehand. We don't care if REG_SCRATCH will be overwritten, so we'll skip 'RegZeroed check'. + // TODO-RV64: this should be resolved before calling `genPushCalleeSavedRegisters`. + genStackPointerAdjustment(-totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); } + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); - JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta); - - genSaveCalleeSavedRegistersHelp(rsPushRegs, offset, 0); - offset += (int)(genCountBits(rsPushRegs) << 3); // each reg has 8 bytes + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - // From now on, we can safely use initReg. + genSaveCalleeSavedRegistersHelp(rsPushRegs, FP_offset + 16, 0); - emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset); - compiler->unwindSaveReg(REG_RA, offset); - - emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset + 8); - compiler->unwindSaveReg(REG_FP, offset + 8); - - JITDUMP(" offsetSpToSavedFp=%d\n", offset + 8); - genEstablishFramePointer(offset + 8, /* reportUnwindData */ true); + JITDUMP(" offsetSpToSavedFp=%d\n", FP_offset); + genEstablishFramePointer(FP_offset, /* reportUnwindData */ true); // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here, // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't @@ -7935,18 +7851,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe NYI_RISCV64("genPushCalleeSavedRegisters unsupports compIsVarArgs"); } -#ifdef DEBUG - if (compiler->opts.disAsm) - { - printf("DEBUG: RISCV64, frameType:%d\n\n", frameType); - } -#endif - - if (calleeSaveSPDelta != 0) + if (leftFrameSize != 0) { - assert(frameType == 2); - calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta; - genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true); + genStackPointerAdjustment(-leftFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true); } } @@ -7956,80 +7863,72 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) regMaskTP regsToRestoreMask = regSet.rsGetModifiedCalleeSavedRegsMask(); - // On RV64 we always use the FP (frame-pointer) assert(isFramePointerUsed()); - int totalFrameSize = genTotalFrameSize(); - int remainingSPSize = totalFrameSize; - int callerSPtoFPdelta = 0; - int calleeSaveSPOffset = 0; // This will be the starting place for restoring - // the callee-saved registers, in decreasing order. + int totalFrameSize = genTotalFrameSize(); + int localFrameSize = compiler->compLclFrameSize; + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + localFrameSize -= TARGET_POINTER_SIZE; + } + if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) + { + localFrameSize -= TARGET_POINTER_SIZE; + } - emitter* emit = GetEmitter(); + JITDUMP("Frame type. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " + "localloc? %s\n", + unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, + dspBool(compiler->compLocallocUsed)); - // ensure offset of sd/ld + emitter* emit = GetEmitter(); + int FP_offset = localFrameSize; + int remainingSPSize = totalFrameSize; if (totalFrameSize <= 2040) { - JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize), - totalFrameSize, dspBool(compiler->compLocallocUsed)); - if (compiler->compLocallocUsed) { - callerSPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize; + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); + compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta); } - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; - // remainingSPSize = totalFrameSize; } else { - JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; calleeSaveRegsPushed: %d; localloc? %s\n", - unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed, - dspBool(compiler->compLocallocUsed)); - - if ((compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3)) > 2047) + if (compiler->compLocallocUsed) { - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize & 0xfffffff0; - - if (compiler->compLocallocUsed) + int SPtoFPdelta = genSPtoFPdelta(); + // Restore sp from fp + if (emitter::isValidSimm12(SPtoFPdelta)) { - callerSPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8; + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta); } else { - genStackPointerAdjustment(calleeSaveSPOffset, REG_RA, nullptr, /* reportUnwindData */ true); + regNumber tempReg = rsGetRsvdReg(); + emit->emitLoadImmediate(EA_PTRSIZE, tempReg, SPtoFPdelta); + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, tempReg); } - remainingSPSize = totalFrameSize - calleeSaveSPOffset; - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize - calleeSaveSPOffset; } - else + if ((localFrameSize + (compiler->compCalleeRegsPushed << 3)) > 2040) { - if (compiler->compLocallocUsed) - { - callerSPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize; - } - calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize; - // remainingSPSize = totalFrameSize; - } - } + remainingSPSize = localFrameSize & -16; + genStackPointerAdjustment(remainingSPSize, REG_RA, nullptr, /* reportUnwindData */ true); - if (compiler->compLocallocUsed) - { - // restore sp form fp: addi sp, -#callerSPtoFPdelta(fp) - emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -callerSPtoFPdelta); - compiler->unwindSetFrameReg(REG_FPBASE, callerSPtoFPdelta); + remainingSPSize = totalFrameSize - remainingSPSize; + FP_offset = localFrameSize & 0xf; + } } - JITDUMP(" calleeSaveSPOffset=%d, callerSPtoFPdelta=%d\n", calleeSaveSPOffset, callerSPtoFPdelta); - genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, 0); - - // restore ra/fp regs - calleeSaveSPOffset += (compiler->compCalleeRegsPushed - 2) << 3; + JITDUMP(" calleeSaveSPOffset=%d\n", FP_offset + 16); + genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, FP_offset + 16, 0); - emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset); - compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset); + emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, FP_offset + 8); + compiler->unwindSaveReg(REG_RA, FP_offset + 8); - emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset + 8); - compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset + 8); + emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, FP_offset); + compiler->unwindSaveReg(REG_FP, FP_offset); if (emitter::isValidUimm11(remainingSPSize)) { @@ -8043,7 +7942,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) } compiler->unwindAllocStack(remainingSPSize); - // for OSR we have to adjust SP to remove tier0 frame + // For OSR, we must also adjust the SP to remove the Tier0 frame. if (compiler->opts.IsOSR()) { const int tier0FrameSize = compiler->info.compPatchpointInfo->TotalFrameSize(); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index c101955fdea7ed..e6645e1f03d8f4 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5669,23 +5669,52 @@ void Compiler::lvaFixVirtualFrameOffsets() // We set FP to be after LR, FP delta += 2 * REGSIZE_BYTES; } -#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) else { // FP is used. JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta()); delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); } -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + else + { + // FP is used. + delta += (compCalleeRegsPushed << 3); + + if ((lvaMonAcquired != BAD_VAR_NUM) && !opts.IsOSR()) + { + int offset = lvaTable[lvaMonAcquired].GetStackOffset() + delta; + lvaTable[lvaMonAcquired].SetStackOffset(offset); + + if (lvaPSPSym != BAD_VAR_NUM) + { + int offset = lvaTable[lvaPSPSym].GetStackOffset() + delta; + lvaTable[lvaPSPSym].SetStackOffset(offset); + delta += TARGET_POINTER_SIZE; + } + + delta += lvaLclSize(lvaMonAcquired); + } + else if (lvaPSPSym != BAD_VAR_NUM) + { + int offset = lvaTable[lvaPSPSym].GetStackOffset() + delta; + lvaTable[lvaPSPSym].SetStackOffset(offset); + delta += TARGET_POINTER_SIZE; + } + + JITDUMP("--- delta bump %d for FP frame\n", delta); + } +#endif // !TARGET_LOONGARCH64 || !TARGET_RISCV64 if (opts.IsOSR()) { -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) // Stack offset includes Tier0 frame. // JITDUMP("--- delta bump %d for OSR + Tier0 frame\n", info.compPatchpointInfo->TotalFrameSize()); delta += info.compPatchpointInfo->TotalFrameSize(); -#endif +#endif // TARGET_AMD64 || TARGET_ARM64 } JITDUMP("--- virtual stack offset to actual stack offset delta is %d\n", delta); @@ -5775,26 +5804,20 @@ void Compiler::lvaFixVirtualFrameOffsets() #endif // FEATURE_FIXED_OUT_ARGS -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // We normally add alignment below the locals between them and the outgoing // arg space area. When we store fp/lr(ra) at the bottom, however, this will // be below the alignment. So we should not apply the alignment adjustment to // them. It turns out we always store these at +0 and +8 of the FP, // so instead of dealing with skipping adjustment just for them we just set // them here always. + // For LoongArch64 and RISCV64, the RA is always at fp+8. assert(codeGen->isFramePointerUsed()); if (lvaRetAddrVar != BAD_VAR_NUM) { lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES); } -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - assert(codeGen->isFramePointerUsed()); - if (lvaRetAddrVar != BAD_VAR_NUM) - { - // For LoongArch64 and RISCV64, the RA is below the fp. see the `genPushCalleeSavedRegisters` - lvaTable[lvaRetAddrVar].SetStackOffset(-REGSIZE_BYTES); - } -#endif // !TARGET_LOONGARCH64 +#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64 || !TARGET_RISCV64 } #ifdef TARGET_ARM @@ -6548,9 +6571,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // if (opts.IsOSR()) { +#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + originalFrameStkOffs = info.compPatchpointInfo->TotalFrameSize(); +#else originalFrameSize = info.compPatchpointInfo->TotalFrameSize(); originalFrameStkOffs = stkOffs; stkOffs -= originalFrameSize; +#endif } #ifdef TARGET_XARCH @@ -6606,7 +6633,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - assert(compCalleeRegsPushed >= 2); + assert(compCalleeRegsPushed >= 2); // always FP/RA. + stkOffs -= (compCalleeRegsPushed << 3); #else // !TARGET_LOONGARCH64 && !TARGET_RISCV64 #ifdef TARGET_ARM @@ -7331,14 +7359,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #endif // FEATURE_FIXED_OUT_ARGS -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For LoongArch64 and RISCV64, CalleeSavedRegs are at bottom. - int pushedCount = 0; -#else // compLclFrameSize equals our negated virtual stack offset minus the pushed registers and return address // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'. int pushedCount = compCalleeRegsPushed; -#endif #ifdef TARGET_ARM64 if (info.compIsVarArgs) diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index dae93baebad306..20b55610594fc6 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -158,8 +158,9 @@ class RegSet regMaskTP _rsMaskVars; // backing store for rsMaskVars property #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + // TODO: the funclet's callee-saved registers should not shared with main function. regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 +#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 public: // TODO-Cleanup: Should be private, but Compiler uses it regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)