From 0c79a82f4c068ad57ceb09873d7acc00f8fa69ea Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 7 Sep 2023 18:32:19 +0800 Subject: [PATCH 1/4] [LoongArch64-RISCV64] Refactor the profiler for LoongArch64 and also fix some errors for RISCV64. --- src/coreclr/jit/codegenloongarch64.cpp | 100 +++++++++++++------ src/coreclr/jit/targetloongarch64.h | 16 ++-- src/coreclr/vm/loongarch64/asmconstants.h | 49 +++++----- src/coreclr/vm/loongarch64/asmhelpers.S | 24 +++-- src/coreclr/vm/loongarch64/cgencpu.h | 30 +++--- src/coreclr/vm/loongarch64/profiler.cpp | 111 +++++++++++----------- src/coreclr/vm/proftoeeinterfaceimpl.h | 4 +- src/coreclr/vm/riscv64/asmconstants.h | 7 +- src/coreclr/vm/riscv64/cgencpu.h | 2 +- 9 files changed, 186 insertions(+), 157 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index bdef67f16595e..0a632a2bef793 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -7758,35 +7758,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) GetEmitter()->emitIns_I(INS_dbar, EA_4BYTE, INS_BARRIER_FULL); } -//----------------------------------------------------------------------------------- -// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. -// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. -// -// Arguments: -// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL -// -// Return Value: -// None -// -void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) -{ - assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); - - // Only hook if profiler says it's okay. - if (!compiler->compIsProfilerHookNeeded()) - { - return; - } - - compiler->info.compProfilerCallback = true; - - // Need to save on to the stack level, since the helper call will pop the argument - unsigned saveStackLvl2 = genStackLevel; - - /* Restore the stack level */ - SetStackLevel(saveStackLvl2); -} - /*----------------------------------------------------------------------------- * * Push/Pop any callee-saved registers we have used @@ -8662,6 +8633,7 @@ void CodeGen::genFnPrologCalleeRegArgs() assert(!regArgMaskLive); } +#ifdef PROFILING_SUPPORTED //----------------------------------------------------------------------------------- // genProfilingEnterCallback: Generate the profiling function enter callback. // @@ -8677,10 +8649,78 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); - // Give profiler a chance to back out of hooking this method if (!compiler->compIsProfilerHookNeeded()) { return; } + + if (compiler->compProfilerMethHndIndirected) + { + instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_FUNC_ID, + (ssize_t)compiler->compProfilerMethHnd); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, + REG_PROFILER_ENTER_ARG_FUNC_ID, 0); + } + else + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd); + } + + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_CALLER_SP, genFramePointerReg(), + (ssize_t)(-callerSPOffset), REG_PROFILER_ENTER_ARG_CALLER_SP); + + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN); + + if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH) != RBM_NONE) + { + *pInitRegZeroed = false; + } } + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + if (compiler->compProfilerMethHndIndirected) + { + instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_LEAVE_ARG_FUNC_ID, + (ssize_t)compiler->compProfilerMethHnd); + GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, + REG_PROFILER_LEAVE_ARG_FUNC_ID, 0); + } + else + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd); + } + + gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_FUNC_ID); + + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_CALLER_SP, genFramePointerReg(), + (ssize_t)(-callerSPOffset), REG_PROFILER_LEAVE_ARG_CALLER_SP); + + gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_CALLER_SP); + + genEmitHelperCall(helper, 0, EA_UNKNOWN); +} +#endif // PROFILING_SUPPORTED + #endif // TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index d9442f17299b4..9c32ef3703ff0 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -212,14 +212,14 @@ #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1)) // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks - #define REG_PROFILER_ENTER_ARG_FUNC_ID REG_R10 - #define RBM_PROFILER_ENTER_ARG_FUNC_ID RBM_R10 - #define REG_PROFILER_ENTER_ARG_CALLER_SP REG_R11 - #define RBM_PROFILER_ENTER_ARG_CALLER_SP RBM_R11 - #define REG_PROFILER_LEAVE_ARG_FUNC_ID REG_R10 - #define RBM_PROFILER_LEAVE_ARG_FUNC_ID RBM_R10 - #define REG_PROFILER_LEAVE_ARG_CALLER_SP REG_R11 - #define RBM_PROFILER_LEAVE_ARG_CALLER_SP RBM_R11 + #define REG_PROFILER_ENTER_ARG_FUNC_ID REG_T1 + #define RBM_PROFILER_ENTER_ARG_FUNC_ID RBM_T1 + #define REG_PROFILER_ENTER_ARG_CALLER_SP REG_T2 + #define RBM_PROFILER_ENTER_ARG_CALLER_SP RBM_T2 + #define REG_PROFILER_LEAVE_ARG_FUNC_ID REG_PROFILER_ENTER_ARG_FUNC_ID + #define RBM_PROFILER_LEAVE_ARG_FUNC_ID RBM_PROFILER_ENTER_ARG_FUNC_ID + #define REG_PROFILER_LEAVE_ARG_CALLER_SP REG_PROFILER_ENTER_ARG_CALLER_SP + #define RBM_PROFILER_LEAVE_ARG_CALLER_SP RBM_PROFILER_ENTER_ARG_CALLER_SP // The registers trashed by profiler enter/leave/tailcall hook #define RBM_PROFILER_ENTER_TRASH (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP)) diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index 929eb732451dd..99aa2fedc9f63 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -255,43 +255,38 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) #ifdef PROFILING_SUPPORTED -#define PROFILE_ENTER 1 -#define PROFILE_LEAVE 2 -#define PROFILE_TAILCALL 4 - -#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 272 -ASMCONSTANTS_C_ASSERT(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA == sizeof(PROFILE_PLATFORM_SPECIFIC_DATA)) +#define PROFILE_ENTER 1 +#define PROFILE_LEAVE 2 +#define PROFILE_TAILCALL 4 + +// NOTE: this should be 16-byte aligned as stack size. +#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 0x140 +ASMCONSTANTS_C_ASSERT(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA == (sizeof(PROFILE_PLATFORM_SPECIFIC_DATA)+8)) + +#define PROFILE_PLATFORM_SPECIFIC_DATA__Fp 0 +#define PROFILE_PLATFORM_SPECIFIC_DATA__Pc 8 +#define PROFILE_PLATFORM_SPECIFIC_DATA__probeSp 16 +#define PROFILE_PLATFORM_SPECIFIC_DATA__profiledSp 24 +#define PROFILE_PLATFORM_SPECIFIC_DATA__hiddenArg 32 +#define PROFILE_PLATFORM_SPECIFIC_DATA__functionId 40 +#define PROFILE_PLATFORM_SPECIFIC_DATA__flags 48 +#define PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters 56 +#define PROFILE_PLATFORM_SPECIFIC_DATA__floatArgumentRegisters 120 #define ASMCONSTANTS_C_ASSERT_OFFSET(type, field) \ ASMCONSTANTS_C_ASSERT(type##__##field == offsetof(type, field)) - -#define PROFILE_PLATFORM_SPECIFIC_DATA__Fp 0 ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, Fp) -#define PROFILE_PLATFORM_SPECIFIC_DATA__Pc 8 ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, Pc) -#define PROFILE_PLATFORM_SPECIFIC_DATA__x8 16 -ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, x8) -#define PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters 24 -ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, argumentRegisters) -#define PROFILE_PLATFORM_SPECIFIC_DATA__functionId 88 -ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, functionId) -#define PROFILE_PLATFORM_SPECIFIC_DATA__floatArgumentRegisters 96 -ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, floatArgumentRegisters) -#define PROFILE_PLATFORM_SPECIFIC_DATA__probeSp 224 ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, probeSp) -#define PROFILE_PLATFORM_SPECIFIC_DATA__profiledSp 232 ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, profiledSp) -#define PROFILE_PLATFORM_SPECIFIC_DATA__hiddenArg 240 ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, hiddenArg) -#define PROFILE_PLATFORM_SPECIFIC_DATA__flags 248 +ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, functionId) ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, flags) -#define PROFILE_PLATFORM_SPECIFIC_DATA__unused 252 -ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, unused) -#define PROFILE_PLATFORM_SPECIFIC_DATA__buffer 256 -ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, buffer) - +ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, argumentRegisters) +ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, floatArgumentRegisters) #undef ASMCONSTANTS_C_ASSERT_OFFSET -#endif + +#endif // PROFILING_SUPPORTED #undef ASMCONSTANTS_RUNTIME_ASSERT #undef ASMCONSTANTS_C_ASSERT diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 8bc20898a68da..9a108fce399cc 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -1078,29 +1078,27 @@ NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler // Values of $a0-$a7, $fa0-$fa7, $fp are preserved. // Values of other volatile registers are not preserved. - // $fp,$ra + // $fp,$ra PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Allocate space and save Fp, Pc. - // Please validate, SAVE_ARGUMENT_REGISTERS doesn't save $t0 - SAVE_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters // Save $t0 and argument registers ($a0-$a7). - st.d $zero, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__functionId // Clear functionId. + + SAVE_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters SAVE_FLOAT_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__floatArgumentRegisters - addi.d $t3, $fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper. - st.d $t3, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__probeSp - st.d $t2, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__profiledSp + st.d $zero, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__functionId + addi.d $t3, $fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper. + st.d $t3, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__probeSp + st.d $t2, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__profiledSp - st.d $zero, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__hiddenArg // Clear hiddenArg. - addi.d $t3, $zero, \flags + st.d $zero, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__hiddenArg + addi.w $t3, $zero, \flags st.w $t3, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__flags - st.d $zero, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__unused ori $a1, $t1, 0 ori $a2, $sp, 0 bl C_FUNC(\helper) - // Please validate, RESTORE_ARGUMENT_REGISTERS doesn't restore $t0 - RESTORE_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters // Restore $t0 and argument registers. + RESTORE_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters RESTORE_FLOAT_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__floatArgumentRegisters - // $fp, $ra + // $fp, $ra EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA EPILOG_RETURN diff --git a/src/coreclr/vm/loongarch64/cgencpu.h b/src/coreclr/vm/loongarch64/cgencpu.h index a9275fda8f24a..6979fa11a9675 100644 --- a/src/coreclr/vm/loongarch64/cgencpu.h +++ b/src/coreclr/vm/loongarch64/cgencpu.h @@ -114,15 +114,14 @@ struct CalleeSavedRegisters { // will probably have to communicate this back to the PromoteCallerStack // routine to avoid a double promotion. //-------------------------------------------------------------------- +#define NUM_ARGUMENT_REGISTERS 8 typedef DPTR(struct ArgumentRegisters) PTR_ArgumentRegisters; struct ArgumentRegisters { - INT64 a[8]; // a0 ....a7 + INT64 a[NUM_ARGUMENT_REGISTERS]; // a0 ....a7 }; -#define NUM_ARGUMENT_REGISTERS 8 #define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters) - //-------------------------------------------------------------------- // This represents the floating point argument registers which are saved // as part of the NegInfo for a FramedMethodFrame. Note that these @@ -130,42 +129,35 @@ struct ArgumentRegisters { // C++ helpers will need to preserve the values in these volatile // registers. //-------------------------------------------------------------------- +#define NUM_FLOAT_ARGUMENT_REGISTERS 8 typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters; struct FloatArgumentRegisters { - //TODO: not supports LOONGARCH-SIMD. - double f[8]; // f0-f7 + double f[NUM_FLOAT_ARGUMENT_REGISTERS]; // fa0-fa7 }; -#define NUM_FLOAT_ARGUMENT_REGISTERS 8 - +#ifdef PROFILING_SUPPORTED //********************************************************************** // Profiling //********************************************************************** -#ifdef PROFILING_SUPPORTED - -// Scratch space to store HFA return values (max 16 bytes) -#define PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE 16 - typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA { void* Fp; void* Pc; - void* x8; - ArgumentRegisters argumentRegisters; - FunctionID functionId; - FloatArgumentRegisters floatArgumentRegisters; void* probeSp; void* profiledSp; void* hiddenArg; + FunctionID functionId; UINT32 flags; - UINT32 unused; - BYTE buffer[PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE]; + ArgumentRegisters argumentRegisters; + FloatArgumentRegisters floatArgumentRegisters; + // Scratch space to reconstruct struct passed two registers: + // one float register and one general register. Including the return args. + BYTE buffer[sizeof(ArgumentRegisters) + sizeof(FloatArgumentRegisters)]; } PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA; #endif // PROFILING_SUPPORTED - //********************************************************************** // Exception handling //********************************************************************** diff --git a/src/coreclr/vm/loongarch64/profiler.cpp b/src/coreclr/vm/loongarch64/profiler.cpp index 4e9075e50f15d..2afa6bf836ea4 100644 --- a/src/coreclr/vm/loongarch64/profiler.cpp +++ b/src/coreclr/vm/loongarch64/profiler.cpp @@ -35,6 +35,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHan _ASSERTE(pPlatformSpecificHandle != nullptr); m_handle = pPlatformSpecificHandle; + m_bufferPos = 0; PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast(pPlatformSpecificHandle); #ifdef _DEBUG @@ -68,7 +69,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHan } else { - // On ARM64 the generic instantiation parameter comes after the optional "this" pointer. + // On LoongArch64 the generic instantiation parameter comes after the optional "this" pointer. if (m_argIterator.HasThis()) { pData->hiddenArg = (void*)pData->argumentRegisters.a[1]; @@ -117,12 +118,41 @@ LPVOID ProfileArgIterator::GetNextArgAddr() return nullptr; } + LPVOID pArg = nullptr; + if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - return (LPBYTE)&pData->floatArgumentRegisters + (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()); - } + pArg = (LPBYTE)&pData->floatArgumentRegisters + (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()); - LPVOID pArg = nullptr; + ArgLocDesc* pArgLocDesc = m_argIterator.GetArgLocDescForStructInRegs(); + + if (pArgLocDesc) + { + if (pArgLocDesc->m_cFloatReg == 1) + { + UINT32 bufferPos = m_bufferPos; + + UINT64* dst = (UINT64*)&pData->buffer[bufferPos]; + m_bufferPos += 16; + if (pArgLocDesc->m_structFields & STRUCT_FLOAT_FIELD_FIRST) + { + *dst++ = *(UINT64*)pArg; + *dst = pData->argumentRegisters.a[pArgLocDesc->m_idxGenReg]; + } + else + { + _ASSERTE(pArgLocDesc->m_structFields & STRUCT_FLOAT_FIELD_SECOND); + *dst++ = pData->argumentRegisters.a[pArgLocDesc->m_idxGenReg]; + *dst = *(UINT64*)pArg; + } + return (LPBYTE)&pData->buffer[bufferPos]; + } + + _ASSERTE(pArgLocDesc->m_cFloatReg == 2); + } + + return pArg; + } if (TransitionBlock::IsArgumentRegisterOffset(argOffset)) { @@ -204,66 +234,39 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void) if (m_argIterator.HasRetBuffArg()) { - if ((pData->flags & PROFILE_ENTER) != 0) - { - return (LPVOID)pData->x8; - } - else - { - // On ARM64 there is no requirement for the method to preserve the value stored in x8. - // In order to workaround this JIT will explicitly return the return buffer address in x0. - _ASSERTE((pData->flags & PROFILE_LEAVE) != 0); - return (LPVOID)pData->argumentRegisters.a[0]; - } + return (LPVOID)pData->argumentRegisters.a[0]; } UINT fpReturnSize = m_argIterator.GetFPReturnSize(); + if (fpReturnSize != 0) { - TypeHandle thReturnValueType; - m_argIterator.GetSig()->GetReturnTypeNormalized(&thReturnValueType); - if (!thReturnValueType.IsNull() && thReturnValueType.IsHFA()) + if ((fpReturnSize & (UINT)STRUCT_FLOAT_FIELD_ONLY_ONE) || (fpReturnSize & (UINT)STRUCT_FLOAT_FIELD_ONLY_TWO)) + { + return &pData->floatArgumentRegisters.f[0]; + } + else { - UINT hfaFieldSize = fpReturnSize / 4; - UINT totalSize = m_argIterator.GetSig()->GetReturnTypeSize(); - _ASSERTE(totalSize % hfaFieldSize == 0); - _ASSERTE(totalSize <= 16); + // If the return type is a structure including floating types and return by floating register. + // As we shared the scratch space, before calling the GetReturnBufferAddr, + // Make sure within the PROFILE_LEAVE stage!!! + _ASSERTE((pData->flags & PROFILE_LEAVE) != 0); - BYTE *dest = pData->buffer; - for (UINT floatRegIdx = 0; floatRegIdx < totalSize / hfaFieldSize; ++floatRegIdx) + // using the tail 16 bytes for return structure. + UINT64* dst = (UINT64*)&pData->buffer[sizeof(pData->buffer) - 16]; + if (fpReturnSize & (UINT)STRUCT_FLOAT_FIELD_FIRST) { - if (hfaFieldSize == 4) - { - *(UINT32*)dest = *(UINT32*)&pData->floatArgumentRegisters.f[floatRegIdx]; - dest += 4; - } - else if (hfaFieldSize == 8) - { - *(UINT64*)dest = *(UINT64*)&pData->floatArgumentRegisters.f[floatRegIdx]; - dest += 8; - } - else - { - _ASSERTE(!"unimplemented on LOONGARCH yet!"); -#if 0 - _ASSERTE(hfaFieldSize == 16); - *(NEON128*)dest = pData->floatArgumentRegisters.f[floatRegIdx]; - dest += 16; -#endif - } - - if (floatRegIdx > 8) - { - // There's only space for 8 arguments in buffer - _ASSERTE(FALSE); - break; - } + *(double*)dst = pData->floatArgumentRegisters.f[0]; + *(dst + 1) = pData->argumentRegisters.a[0]; } - - return pData->buffer; + else + { + _ASSERTE(fpReturnSize & (UINT)STRUCT_FLOAT_FIELD_SECOND); + *dst = pData->argumentRegisters.a[0]; + *(double*)(dst + 1) = pData->floatArgumentRegisters.f[0]; + } + return dst; } - - return &pData->floatArgumentRegisters.f[0]; } if (!m_argIterator.GetSig()->IsReturnTypeVoid()) diff --git a/src/coreclr/vm/proftoeeinterfaceimpl.h b/src/coreclr/vm/proftoeeinterfaceimpl.h index 0ab307197b11a..885752ec97eb3 100644 --- a/src/coreclr/vm/proftoeeinterfaceimpl.h +++ b/src/coreclr/vm/proftoeeinterfaceimpl.h @@ -56,7 +56,7 @@ class ProfileArgIterator private: void *m_handle; ArgIterator m_argIterator; -#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) +#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) UINT64 m_bufferPos; #if defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64) @@ -76,7 +76,7 @@ class ProfileArgIterator LPVOID CopyStructFromFPRegs(int idxFPReg, int cntFPRegs, int hfaFieldSize); #endif -#endif // UNIX_AMD64_ABI || TARGET_ARM64 +#endif // UNIX_AMD64_ABI || TARGET_ARM64 || TARGET_RISCV64 || TARGET_LOONGARCH64 public: ProfileArgIterator(MetaSig * pMetaSig, void* platformSpecificHandle); diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index cf5cd285c2d6b..7d97dd4394389 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -255,7 +255,10 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offseto #define PROFILE_LEAVE 2 #define PROFILE_TAILCALL 4 -#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 312 +// NOTE: this should be 16-byte aligned as stack size. +#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 0x140 +ASMCONSTANTS_C_ASSERT(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA == (sizeof(PROFILE_PLATFORM_SPECIFIC_DATA)+8)) + #define PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters 16 #define PROFILE_PLATFORM_SPECIFIC_DATA__functionId 80 #define PROFILE_PLATFORM_SPECIFIC_DATA__floatArgumentRegisters 88 @@ -264,8 +267,6 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offseto #define PROFILE_PLATFORM_SPECIFIC_DATA__hiddenArg 168 #define PROFILE_PLATFORM_SPECIFIC_DATA__flags 176 -ASMCONSTANTS_C_ASSERT(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA == sizeof(PROFILE_PLATFORM_SPECIFIC_DATA)) - #define ASMCONSTANTS_C_ASSERT_OFFSET(type, field) \ ASMCONSTANTS_C_ASSERT(type##__##field == offsetof(type, field)) ASMCONSTANTS_C_ASSERT_OFFSET(PROFILE_PLATFORM_SPECIFIC_DATA, argumentRegisters) diff --git a/src/coreclr/vm/riscv64/cgencpu.h b/src/coreclr/vm/riscv64/cgencpu.h index 25d2efbc4dedb..2549ec22e60bc 100644 --- a/src/coreclr/vm/riscv64/cgencpu.h +++ b/src/coreclr/vm/riscv64/cgencpu.h @@ -144,7 +144,7 @@ struct PROFILE_PLATFORM_SPECIFIC_DATA void* profiledSp; void* hiddenArg; UINT64 flags; - // Scratch space to reconstruct struct passed in registers + // Scratch space to reconstruct struct passed in two registers BYTE buffer[sizeof(ArgumentRegisters) + sizeof(FloatArgumentRegisters)]; }; #endif // PROFILING_SUPPORTED From 070657c9f44d3e373c0803e2bf53a98408adb438 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 8 Sep 2023 08:43:45 +0800 Subject: [PATCH 2/4] add aligned assert for `SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA` --- src/coreclr/vm/arm64/asmconstants.h | 1 + src/coreclr/vm/loongarch64/asmconstants.h | 1 + src/coreclr/vm/riscv64/asmconstants.h | 1 + 3 files changed, 3 insertions(+) diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 46a446c0c8416..fe378e24c2f0f 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -240,6 +240,7 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offseto #define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 320 ASMCONSTANTS_C_ASSERT(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA == sizeof(PROFILE_PLATFORM_SPECIFIC_DATA)) +ASMCONSTANTS_C_ASSERT((SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA & 0xf) == 0) #define ASMCONSTANTS_C_ASSERT_OFFSET(type, field) \ ASMCONSTANTS_C_ASSERT(type##__##field == offsetof(type, field)) diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index 99aa2fedc9f63..cba342b8d58af 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -262,6 +262,7 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offseto // NOTE: this should be 16-byte aligned as stack size. #define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 0x140 ASMCONSTANTS_C_ASSERT(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA == (sizeof(PROFILE_PLATFORM_SPECIFIC_DATA)+8)) +ASMCONSTANTS_C_ASSERT((SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA & 0xf) == 0) #define PROFILE_PLATFORM_SPECIFIC_DATA__Fp 0 #define PROFILE_PLATFORM_SPECIFIC_DATA__Pc 8 diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 7d97dd4394389..eb421593523d5 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -258,6 +258,7 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offseto // NOTE: this should be 16-byte aligned as stack size. #define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 0x140 ASMCONSTANTS_C_ASSERT(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA == (sizeof(PROFILE_PLATFORM_SPECIFIC_DATA)+8)) +ASMCONSTANTS_C_ASSERT((SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA & 0xf) == 0) #define PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters 16 #define PROFILE_PLATFORM_SPECIFIC_DATA__functionId 80 From 055f39a34ee722ecf4b321a679ea29e0fdea95ae Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 8 Sep 2023 12:01:10 +0800 Subject: [PATCH 3/4] update the args registers within the GenerateProfileHelper. fix some conflicts within registers. --- src/coreclr/jit/emitloongarch64.cpp | 33 ++++++++++++------------- src/coreclr/jit/targetloongarch64.h | 4 +-- src/coreclr/vm/loongarch64/asmhelpers.S | 6 ++--- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 6ead1a674099d..73f2dffebada8 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -2507,10 +2507,10 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t { // pc + offset_38bits // - // pcaddu18i t2, addr-hi20 - // jilr r0/1,t2,addr-lo18 + // pcaddu18i t4, addr-hi20 + // jilr r0/1,t4,addr-lo18 - emitOutput_Instr(dst, 0x1e00000e); + emitOutput_Instr(dst, 0x1e000000 | (int)REG_DEFAULT_HELPER_CALL_TARGET); size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr. @@ -2521,25 +2521,24 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t assert((addr & 3) == 0); dst += 4; - emitGCregDeadUpd(REG_T2, dst); + emitGCregDeadUpd(REG_DEFAULT_HELPER_CALL_TARGET, dst); #ifdef DEBUG code = emitInsCode(INS_pcaddu18i); - assert((code | (14)) == 0x1e00000e); - assert((int)REG_T2 == 14); + assert(code == 0x1e000000); code = emitInsCode(INS_jirl); assert(code == 0x4c000000); #endif - emitOutput_Instr(dst, 0x4c000000 | (14 << 5) | reg2); + emitOutput_Instr(dst, 0x4c000000 | ((int)REG_DEFAULT_HELPER_CALL_TARGET << 5) | reg2); emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_LOONGARCH64_JIR); } else { - // lu12i_w t2, addr_bits[31:12] // TODO-LoongArch64: maybe optimize. - // ori t2, t2, addr_bits[11:0] - // lu32i_d t2, addr_bits[50:32] - // jirl t2 + // lu12i_w t4, addr_bits[31:12] // TODO-LoongArch64: maybe optimize. + // ori t4, t4, addr_bits[11:0] + // lu32i_d t4, addr_bits[50:32] + // jirl t4 ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr); assert((uint64_t)(imm >> 32) <= 0x7ffff); // In fact max is <= 0xffff. @@ -2548,22 +2547,22 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t imm -= reg2; code = emitInsCode(INS_lu12i_w); - code |= (code_t)REG_T2; + code |= (code_t)REG_DEFAULT_HELPER_CALL_TARGET; code |= ((code_t)(imm >> 12) & 0xfffff) << 5; emitOutput_Instr(dst, code); dst += 4; - emitGCregDeadUpd(REG_T2, dst); + emitGCregDeadUpd(REG_DEFAULT_HELPER_CALL_TARGET, dst); code = emitInsCode(INS_ori); - code |= (code_t)REG_T2; - code |= (code_t)REG_T2 << 5; + code |= (code_t)REG_DEFAULT_HELPER_CALL_TARGET; + code |= (code_t)REG_DEFAULT_HELPER_CALL_TARGET << 5; code |= (code_t)(imm & 0xfff) << 10; emitOutput_Instr(dst, code); dst += 4; code = emitInsCode(INS_lu32i_d); - code |= (code_t)REG_T2; + code |= (code_t)REG_DEFAULT_HELPER_CALL_TARGET; code |= ((imm >> 32) & 0x7ffff) << 5; emitOutput_Instr(dst, code); @@ -2571,7 +2570,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code = emitInsCode(INS_jirl); code |= (code_t)reg2; - code |= (code_t)REG_T2 << 5; + code |= (code_t)REG_DEFAULT_HELPER_CALL_TARGET << 5; // the offset default is 0; emitOutput_Instr(dst, code); } diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 9c32ef3703ff0..736fd1406c304 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -75,8 +75,8 @@ #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED) #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH) - #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2 - #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2 + #define REG_DEFAULT_HELPER_CALL_TARGET REG_T4 + #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T4 #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH) #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH) diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 9a108fce399cc..56fb0c0b9247a 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -1084,7 +1084,7 @@ NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler SAVE_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters SAVE_FLOAT_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__floatArgumentRegisters st.d $zero, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__functionId - addi.d $t3, $fp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper. + addi.d $t3, $sp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA // Compute probeSp - initial value of Sp on entry to the helper. st.d $t3, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__probeSp st.d $t2, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__profiledSp @@ -1092,8 +1092,8 @@ NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler addi.w $t3, $zero, \flags st.w $t3, $sp, PROFILE_PLATFORM_SPECIFIC_DATA__flags - ori $a1, $t1, 0 - ori $a2, $sp, 0 + ori $a0, $t1, 0 + ori $a1, $sp, 0 bl C_FUNC(\helper) RESTORE_ARGUMENT_REGISTERS $sp, PROFILE_PLATFORM_SPECIFIC_DATA__argumentRegisters From cda96f89c573996ac8a1480e131051450c0757e1 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Fri, 8 Sep 2023 16:15:14 +0800 Subject: [PATCH 4/4] add `assert(!compiler->compProfilerMethHndIndirected)` within the `genProfilingLeaveCallback()` and `genProfilingEnterCallback()`. --- src/coreclr/jit/codegenloongarch64.cpp | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 0a632a2bef793..2a525c53309be 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -8654,17 +8654,8 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) return; } - if (compiler->compProfilerMethHndIndirected) - { - instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_FUNC_ID, - (ssize_t)compiler->compProfilerMethHnd); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, - REG_PROFILER_ENTER_ARG_FUNC_ID, 0); - } - else - { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd); - } + assert(!compiler->compProfilerMethHndIndirected); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_PROFILER_ENTER_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd); int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_CALLER_SP, genFramePointerReg(), @@ -8699,17 +8690,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC compiler->info.compProfilerCallback = true; - if (compiler->compProfilerMethHndIndirected) - { - instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, REG_PROFILER_LEAVE_ARG_FUNC_ID, - (ssize_t)compiler->compProfilerMethHnd); - GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, - REG_PROFILER_LEAVE_ARG_FUNC_ID, 0); - } - else - { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd); - } + assert(!compiler->compProfilerMethHndIndirected); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_PROFILER_LEAVE_ARG_FUNC_ID, (ssize_t)compiler->compProfilerMethHnd); gcInfo.gcMarkRegSetNpt(RBM_PROFILER_LEAVE_ARG_FUNC_ID);