From c1052fd214b021232250e7069c5e58440bdbd047 Mon Sep 17 00:00:00 2001 From: qiaopengcheng Date: Thu, 26 May 2022 11:46:27 +0800 Subject: [PATCH] [LoongArch64] enable the promote feature. --- src/coreclr/jit/codegen.h | 4 - src/coreclr/jit/codegencommon.cpp | 4 + src/coreclr/jit/codegenloongarch64.cpp | 604 ++++++++++++++++--------- src/coreclr/jit/emit.cpp | 7 +- src/coreclr/jit/lclvars.cpp | 13 +- src/coreclr/jit/optcse.cpp | 14 +- src/coreclr/jit/targetloongarch64.h | 4 +- 7 files changed, 414 insertions(+), 236 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index eecfe4d2d0557..d567ef1537a37 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1393,10 +1393,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode); void genMultiRegStoreToLocal(GenTreeLclVar* lclNode); -#if defined(TARGET_LOONGARCH64) - void genMultiRegCallStoreToLocal(GenTree* treeNode); -#endif - // Codegen for multi-register struct returns. bool isStructReturn(GenTree* treeNode); #ifdef FEATURE_SIMD diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 7f6ed78d0a5f7..b755578022986 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8251,6 +8251,10 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) } else { +#ifdef TARGET_LOONGARCH64 + // should consider the pading field within a struct. + offset = (offset % genTypeSize(srcType)) ? AlignUp(offset, genTypeSize(srcType)) : offset; +#endif // Several fields could be passed in one register, copy using the register type. // It could rewrite memory outside of the fields but local on the stack are rounded to POINTER_SIZE so // it is safe to store a long register into a byte field as it is known that we have enough padding after. diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index e3b3ebaa6147f..67d64db8c6a82 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -901,15 +901,15 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack - * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 224 bytes: + * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 232 bytes: * * FP,RA registers - * 8 int callee-saved register s0-s7 + * 9 int callee-saved register s0-s8 * 8 float callee-saved registers f24-f31 * 8 saved integer argument registers a0-a7, if varargs function * 1 PSP slot * 1 alignment slot, future maybe add gp - * == 28 slots * 8 bytes = 224 bytes. + * == 29 slots * 8 bytes = 232 bytes. * * The outgoing argument size, however, can be very large, if we call a function that takes a large number of * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main @@ -963,7 +963,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * else * { * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. - * // TODO-LOONGARCH64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. + * // TODO-LOONGARCH64-CQ: if VM set a1 to CallerSP on entry, like for filters, we could save an instruction. * * daddiu a3, fp, Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. a3 is scratch? * sd a3, SP_to_PSP_slot_delta(sp) ; store the PSP @@ -1913,19 +1913,16 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) // If this is a register candidate that has been spilled, genConsumeReg() will // reload it at the point of use. Otherwise, if it's not in a register, we load it here. - if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED)) + if (!isRegCandidate && !tree->IsMultiReg() && !(tree->gtFlags & GTF_SPILLED)) { var_types targetType = varDsc->GetRegisterType(tree); - // if (tree->gtFlags & GTF_UNSIGNED) - // targetType = varTypeSignedToUnsigned(targetType);//uuuuu. - emitter* emit = GetEmitter(); - // targetType must be a normal scalar type and not a TYP_STRUCT assert(targetType != TYP_STRUCT); + instruction ins = ins_Load(targetType); emitAttr attr = emitTypeSize(targetType); - emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); + GetEmitter()->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0); genProduceReg(tree); } } @@ -2013,25 +2010,18 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) // case is handled separately. if (data->gtSkipReloadOrCopy()->IsMultiRegNode()) { - genMultiRegCallStoreToLocal(lclNode); + genMultiRegStoreToLocal(lclNode); return; } - regNumber targetReg = lclNode->GetRegNum(); - emitter* emit = GetEmitter(); - - unsigned varNum = lclNode->GetLclNum(); - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - var_types targetType = varDsc->GetRegisterType(lclNode); - + LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); if (lclNode->IsMultiReg()) { + NYI_LOONGARCH64("genCodeForStoreLclVar : unimplemented on LoongArch64 yet"); regNumber operandReg = genConsumeReg(data); unsigned int regCount = varDsc->lvFieldCnt; for (unsigned i = 0; i < regCount; ++i) { - NYI("unimplemented on LOONGARCH64 yet"); regNumber varReg = lclNode->GetRegByIndex(i); assert(varReg != REG_NA); unsigned fieldLclNum = varDsc->lvFieldLclStart + i; @@ -2043,6 +2033,11 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) } else { + regNumber targetReg = lclNode->GetRegNum(); + emitter* emit = GetEmitter(); + unsigned varNum = lclNode->GetLclNum(); + var_types targetType = varDsc->GetRegisterType(lclNode); + #ifdef FEATURE_SIMD // storing of TYP_SIMD12 (i.e. Vector3) field if (lclNode->TypeGet() == TYP_SIMD12) @@ -2059,7 +2054,8 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) { // This is only possible for a zero-init or bitcast. const bool zeroInit = data->IsIntegralConst(0); - // TODO-LOONGARCH64-CQ: not supporting SIMD. + + // TODO-LOONGARCH64-CQ: supporting the SIMD. assert(!varTypeIsSIMD(targetType)); if (zeroInit) @@ -2176,7 +2172,7 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) { if (attr == EA_4BYTE) { - if (treeNode->gtFlags & GTF_UNSIGNED) + if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) { GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0); } @@ -4104,18 +4100,44 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) switch (cmpSize) { case EA_4BYTE: - imm = static_cast(imm); + if (IsUnsigned) + { + imm = static_cast(imm); + + regNumber tmpRegOp1 = rsGetRsvdReg(); + assert(regOp1 != tmpRegOp1); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + regOp1 = tmpRegOp1; + } + else + { + imm = static_cast(imm); + } break; case EA_8BYTE: break; case EA_1BYTE: - imm = static_cast(imm); + if (IsUnsigned) + { + imm = static_cast(imm); + } + else + { + imm = static_cast(imm); + } break; // case EA_2BYTE: - // imm = static_cast(imm); + // if (IsUnsigned) + // { + // imm = static_cast(imm); + // } + // else + // { + // imm = static_cast(imm); + // } // break; default: - assert(!"Unexpected type in jumpTrue(imm)."); + unreached(); } if (tree->OperIs(GT_LT)) @@ -4226,13 +4248,23 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree) { regNumber regOp2 = op2->GetRegNum(); - if ((cmpSize == EA_4BYTE) && IsUnsigned) + if (cmpSize == EA_4BYTE) { regNumber tmpRegOp1 = REG_RA; regNumber tmpRegOp2 = rsGetRsvdReg(); + assert(regOp1 != tmpRegOp2); + assert(regOp2 != tmpRegOp2); - emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, regOp1, 0); - emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, regOp2, 0); + if (IsUnsigned) + { + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp2, regOp2, 31, 0); + } + else + { + emit->emitIns_R_R_I(INS_slli_w, EA_8BYTE, tmpRegOp1, regOp1, 0); + emit->emitIns_R_R_I(INS_slli_w, EA_8BYTE, tmpRegOp2, regOp2, 0); + } regOp1 = tmpRegOp1; regOp2 = tmpRegOp2; @@ -4299,6 +4331,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) var_types op1Type = genActualType(op1->TypeGet()); var_types op2Type = genActualType(op2->TypeGet()); + assert(genTypeSize(op1Type) == genTypeSize(op2Type)); bool IsEq = tree == jtrue->gtPrev; @@ -4314,8 +4347,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) if (varTypeIsFloating(op1Type)) { - assert(genTypeSize(op1Type) == genTypeSize(op2Type)); - assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE)); bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0; @@ -4429,9 +4460,14 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) switch (cmpSize) { case EA_4BYTE: - if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED)) + if (IsUnsigned) { imm = static_cast(imm); + + regNumber tmpRegOp1 = rsGetRsvdReg(); + assert(regOp1 != tmpRegOp1); + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); + regOp1 = tmpRegOp1; } else { @@ -4441,11 +4477,27 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) case EA_8BYTE: break; case EA_1BYTE: - imm = static_cast(imm); + if (IsUnsigned) + { + imm = static_cast(imm); + } + else + { + imm = static_cast(imm); + } break; - + // case EA_2BYTE: + // if (IsUnsigned) + // { + // imm = static_cast(imm); + // } + // else + // { + // imm = static_cast(imm); + // } + // break; default: - assert(!"Unexpected type in jumpTrue(imm)."); + unreached(); } emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); @@ -4489,43 +4541,26 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue) else { regNumber regOp2 = op2->GetRegNum(); - if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && - compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - { - regNumber tmpRegOp1 = rsGetRsvdReg(); - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); - regOp1 = tmpRegOp1; - regOp2 = REG_RA; - } - else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && - compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - { - regNumber tmpRegOp1 = rsGetRsvdReg(); - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); - regOp1 = tmpRegOp1; - regOp2 = REG_RA; - } - else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && - compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()) - { - emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0); - regOp2 = REG_RA; - } - else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED)) + if (cmpSize == EA_4BYTE) { - if (!(op1->gtFlags & GTF_UNSIGNED)) + regNumber tmpRegOp1 = REG_RA; + regNumber tmpRegOp2 = rsGetRsvdReg(); + assert(regOp1 != tmpRegOp2); + assert(regOp2 != tmpRegOp2); + + if (IsUnsigned) { - regNumber tmpRegOp1 = rsGetRsvdReg(); emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0); - regOp1 = tmpRegOp1; + emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp2, regOp2, 31, 0); } - if (!(op2->gtFlags & GTF_UNSIGNED)) + else { - emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0); - regOp2 = REG_RA; + emit->emitIns_R_R_I(INS_slli_w, EA_8BYTE, tmpRegOp1, regOp1, 0); + emit->emitIns_R_R_I(INS_slli_w, EA_8BYTE, tmpRegOp2, regOp2, 0); } + + regOp1 = tmpRegOp1; + regOp2 = tmpRegOp2; } if (tree->OperIs(GT_LT)) @@ -4621,17 +4656,19 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) instruction ins; int regs; - if (op2->AsIntCon()->gtIconVal) + ssize_t imm = op2->AsIntCon()->gtIconVal; + assert(reg != REG_R21); + assert(reg != REG_RA); + + if (attr == EA_4BYTE) + { + imm = (int32_t)imm; + GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0); + reg = REG_RA; + } + + if (imm != 0) { - assert(reg != REG_R21); - ssize_t imm = op2->AsIntCon()->gtIconVal; - if (attr == EA_4BYTE) - { - assert(reg != REG_RA); - imm = (int32_t)imm; - GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0); - reg = REG_RA; - } GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm); regs = (int)reg << 5; regs |= (int)REG_R21; @@ -6262,127 +6299,6 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) } #endif // FEATURE_ARG_SPLIT -// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local -// -// Arguments: -// treeNode - Gentree of GT_STORE_LCL_VAR -// -// Return Value: -// None -// -// Assumption: -// The child of store is a multi-reg call node. -// genProduceReg() on treeNode is made by caller of this routine. -// -void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode) -{ - assert(treeNode->OperGet() == GT_STORE_LCL_VAR); - - // Structs of size >=9 and <=16 are returned in two return registers on LOONGARCH64 and HFAs. - assert(varTypeIsStruct(treeNode)); - - // Assumption: current implementation requires that a multi-reg - // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from - // being promoted. - unsigned lclNum = treeNode->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); - noway_assert(varDsc->lvIsMultiRegRet); - - GenTree* op1 = treeNode->gtGetOp1(); - GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); - GenTreeCall* call = actualOp1->AsCall(); - assert(call->HasMultiRegRetVal()); - - genConsumeRegs(op1); - - const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = pRetTypeDesc->GetReturnRegCount(); - - if (treeNode->GetRegNum() != REG_NA) - { - NYI("unimplemented on LOONGARCH64 yet"); - // Right now the only enregistrable multi-reg return types supported are SIMD types. - assert(varTypeIsSIMD(treeNode)); - assert(regCount != 0); - - regNumber dst = treeNode->GetRegNum(); - - // Treat dst register as a homogenous vector with element size equal to the src size - // Insert pieces in reverse order - for (int i = regCount - 1; i >= 0; --i) - { - var_types type = pRetTypeDesc->GetReturnRegType(i); - regNumber reg = call->GetRegNumByIdx(i); - if (op1->IsCopyOrReload()) - { - // GT_COPY/GT_RELOAD will have valid reg for those positions - // that need to be copied or reloaded. - regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); - if (reloadReg != REG_NA) - { - reg = reloadReg; - } - } - - assert(reg != REG_NA); - if (varTypeIsFloating(type)) - { - // If the register piece was passed in a floating point register - // Use a vector mov element instruction - // src is not a vector, so it is in the first element reg[0] - // mov dst[i], reg[0] - // This effectively moves from `reg[0]` to `dst[i]`, leaving other dst bits unchanged till further - // iterations - // For the case where reg == dst, if we iterate so that we write dst[0] last, we eliminate the need for - // a temporary - GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), dst, reg, i, 0); - } - else - { - // If the register piece was passed in an integer register - // Use a vector mov from general purpose register instruction - // mov dst[i], reg - // This effectively moves from `reg` to `dst[i]` - GetEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), dst, reg, i); - } - } - - genProduceReg(treeNode); - } - else - { - // Stack store - int offset = 0; - var_types type = pRetTypeDesc->GetReturnRegType(0); - regNumber reg = call->GetRegNumByIdx(0); - if (op1->IsCopyOrReload()) - { - // GT_COPY/GT_RELOAD will have valid reg for those positions - // that need to be copied or reloaded. - regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0); - if (reloadReg != REG_NA) - { - reg = reloadReg; - } - } - - assert(reg != REG_NA); - GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); - - if (1 < regCount) - { - offset = genTypeSize(type); - type = pRetTypeDesc->GetReturnRegType(1); - reg = call->GetRegNumByIdx(1); - offset = (offset < (int)genTypeSize(type)) ? genTypeSize(type) : offset; - GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); - } - - genUpdateLife(treeNode); - varDsc->SetRegNum(REG_STK); - } -} - //------------------------------------------------------------------------ // genRangeCheck: generate code for GT_BOUNDS_CHECK node. // @@ -8983,8 +8899,11 @@ void CodeGen::genFnPrologCalleeRegArgs() noway_assert(regArgMaskLive != 0); unsigned varNum; - unsigned regArgsVars[MAX_REG_ARG * 2] = {0}; - unsigned regArgNum = 0; + unsigned regArgMaskIsInt = 0; + unsigned regArgNum = 0; + // Process any circular dependencies + unsigned regArg[MAX_REG_ARG * 2] = {0}; + unsigned regArgInit[MAX_REG_ARG * 2] = {0}; for (varNum = 0; varNum < compiler->lvaCount; ++varNum) { LclVarDsc* varDsc = compiler->lvaTable + varNum; @@ -9006,20 +8925,91 @@ void CodeGen::genFnPrologCalleeRegArgs() assert(!(genIsValidIntReg(varDsc->GetOtherArgReg()) || genIsValidFloatReg(varDsc->GetOtherArgReg()))); if (varDsc->GetArgInitReg() != varDsc->GetArgReg()) { - if (varDsc->GetArgInitReg() > REG_ARG_LAST) + if (genIsValidIntReg(varDsc->GetArgInitReg())) { - inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG, - varDsc->GetArgInitReg(), varDsc->GetArgReg(), false); - regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + if (varDsc->GetArgInitReg() > REG_ARG_LAST) + { + bool isSkip; + instruction ins; + emitAttr size; + if (genIsValidIntReg(varDsc->GetArgReg())) + { + ins = INS_mov; + if (varDsc->TypeGet() == TYP_INT) + { + size = EA_4BYTE; + isSkip = false; + } + else + { + size = EA_PTRSIZE; + isSkip = true; + } + } + else + { + ins = INS_movfr2gr_d; + size = EA_PTRSIZE; + isSkip = true; + } + GetEmitter()->emitIns_Mov(ins, size, varDsc->GetArgInitReg(), varDsc->GetArgReg(), isSkip); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + if (genIsValidIntReg(varDsc->GetArgReg())) + { + assert(varDsc->GetArgReg() >= REG_ARG_FIRST); + regArg[varDsc->GetArgReg() - REG_ARG_FIRST] = varDsc->GetArgReg(); + regArgInit[varDsc->GetArgReg() - REG_ARG_FIRST] = varDsc->GetArgInitReg(); + if (varDsc->TypeGet() == TYP_INT) + { + regArgMaskIsInt = 1 << (unsigned)varDsc->GetArgReg(); + } + } + else + { + assert(genIsValidFloatReg(varDsc->GetArgReg())); + regArg[(varDsc->GetArgReg() & 7) | 0x8] = varDsc->GetArgReg(); + regArgInit[(varDsc->GetArgReg() & 7) | 0x8] = varDsc->GetArgInitReg(); + } + regArgNum++; + } } else { - regArgsVars[regArgNum] = varNum; - regArgNum++; + assert(genIsValidFloatReg(varDsc->GetArgInitReg())); + if (genIsValidIntReg(varDsc->GetArgReg())) + { + GetEmitter()->emitIns_Mov(INS_movgr2fr_d, EA_PTRSIZE, varDsc->GetArgInitReg(), + varDsc->GetArgReg(), false); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else if (varDsc->GetArgInitReg() > REG_ARG_FP_LAST) + { + GetEmitter()->emitIns_Mov(INS_fmov_d, EA_PTRSIZE, varDsc->GetArgInitReg(), varDsc->GetArgReg(), + true); + regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } + else + { + assert(genIsValidFloatReg(varDsc->GetArgReg())); + regArg[(varDsc->GetArgReg() & 7) | 0x8] = varDsc->GetArgReg(); + regArgInit[(varDsc->GetArgReg() & 7) | 0x8] = varDsc->GetArgInitReg(); + regArgNum++; + } } } else + { + // TODO for LoongArch64: should delete this by optimization "struct {long a; int32_t b;};" + // liking AMD64_ABI within morph. + if (genIsValidIntReg(varDsc->GetArgReg()) && (varDsc->TypeGet() == TYP_INT)) + { + GetEmitter()->emitIns_Mov(INS_mov, EA_4BYTE, varDsc->GetArgInitReg(), varDsc->GetArgReg(), false); + } regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + } #ifdef USING_SCOPE_INFO psiMoveToReg(varNum); #endif // USING_SCOPE_INFO @@ -9036,7 +9026,33 @@ void CodeGen::genFnPrologCalleeRegArgs() // if (varDsc->lvPromoted || varDsc->lvIsStructField) { - assert(!"-------------Should confirm on Loongarch!"); + LclVarDsc* parentVarDsc = varDsc; + if (varDsc->lvIsStructField) + { + assert(!varDsc->lvPromoted); + parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl]; + } + + Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc); + + if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) + { + // For register arguments that are independent promoted structs we put the promoted field varNum + // in the regArgTab[] + if (varDsc->lvPromoted) + { + continue; + } + } + else + { + // For register arguments that are not independent promoted structs we put the parent struct varNum + // in the regArgTab[] + if (varDsc->lvIsStructField) + { + continue; + } + } } var_types storeType = TYP_UNDEF; @@ -9098,7 +9114,7 @@ void CodeGen::genFnPrologCalleeRegArgs() regNumber tmp_reg = REG_NA; bool FPbased; - int baseOffset = 0; //(regArgTab[argNum].slot - 1) * slotSize; + int baseOffset = 0; int base = compiler->lvaFrameAddress(varNum, &FPbased); base += baseOffset; @@ -9219,31 +9235,169 @@ void CodeGen::genFnPrologCalleeRegArgs() } } - while (regArgNum > 0) + if (regArgNum > 0) { - varNum = regArgsVars[regArgNum - 1]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - - if (varDsc->GetArgInitReg() > varDsc->GetArgReg()) + instruction ins; + for (int i = MAX_REG_ARG - 1; i >= 0; i--) { - var_types destMemType = varDsc->TypeGet(); - GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(), - varDsc->GetArgReg()); - regArgNum--; - regArgMaskLive &= ~genRegMask(varDsc->GetArgReg()); + if (regArg[i] > 0) + { + assert(genIsValidIntReg((regNumber)regArg[i])); + assert(genIsValidIntReg((regNumber)regArgInit[i])); + + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[i]); + if ((regArgMaskIsInt & (1 << regArg[i])) != 0) + { + ins = INS_slli_w; + } + else + { + ins = INS_ori; + } + + if (regArgNum == 0) + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); + break; + } + else if (regArgInit[i] > regArg[i]) + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); + } + else + { + assert(i > 0); + assert(regArgNum > 0); + + int j = regArgInit[i] - REG_ARG_FIRST; + assert((j >= 0) && (j < MAX_REG_ARG)); + if (regArg[j] == 0) + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], 0); + } + else + { + int k = regArgInit[j] - REG_ARG_FIRST; + assert((k >= 0) && (k < MAX_REG_ARG)); + instruction ins2 = (regArgMaskIsInt & (1 << regArg[j])) != 0 ? INS_slli_w : INS_ori; + if ((regArg[k] == 0) || (k > i)) + { + GetEmitter()->emitIns_R_R_I(ins2, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j], 0); + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], + 0); + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + if (regArgNum == 0) + { + break; + } + } + else if (k == i) + { + GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, REG_R21, (regNumber)regArg[i], 0); + GetEmitter()->emitIns_R_R_I(ins2, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j], 0); + GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, (regNumber)regArgInit[i], REG_R21, 0); + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + regArg[j] = 0; + if (regArgNum == 0) + { + break; + } + } + else + { + NYI_LOONGARCH64("-----------CodeGen::genFnPrologCalleeRegArgs() error!--"); + } + } + } + } } - else + + if (regArgNum > 0) { - for (unsigned i = 0; i < regArgNum; i++) + for (int i = MAX_REG_ARG + MAX_FLOAT_REG_ARG - 1; i >= MAX_REG_ARG; i--) { - LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i]; - var_types destMemType = varDsc2->GetRegisterType(); - inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false, - emitActualTypeSize(destMemType)); - regArgMaskLive &= ~genRegMask(varDsc2->GetArgReg()); + if (regArg[i] > 0) + { + assert(genIsValidFloatReg((regNumber)regArg[i])); + + instruction ins = genIsValidIntReg((regNumber)regArgInit[i]) ? INS_movfr2gr_d : INS_fmov_d; + + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[i]); + if (regArgNum == 0) + { + GetEmitter()->emitIns_Mov(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], + true); + break; + } + else if (regArgInit[i] > regArg[i]) + { + GetEmitter()->emitIns_R_R(INS_fmov_d, EA_PTRSIZE, (regNumber)regArgInit[i], + (regNumber)regArg[i]); + } + else + { + assert(i > MAX_REG_ARG); + assert(regArgNum > 0); + + int j = genIsValidIntReg((regNumber)regArgInit[i]) ? (regArgInit[i] - REG_ARG_FIRST) + : ((regArgInit[i] & 0x7) | 0x8); + if (regArg[j] == 0) + { + GetEmitter()->emitIns_Mov(ins, EA_PTRSIZE, (regNumber)regArgInit[i], (regNumber)regArg[i], + true); + } + else + { + // NOTE: Not support the int-register case. + assert(genIsValidFloatReg((regNumber)regArg[j])); + assert(genIsValidFloatReg((regNumber)regArgInit[j])); + + int k = (regArgInit[j] & 0x7) | 0x8; + if ((regArg[k] == 0) || (k > i)) + { + GetEmitter()->emitIns_R_R(INS_fmov_d, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j]); + GetEmitter()->emitIns_R_R(INS_fmov_d, EA_PTRSIZE, (regNumber)regArgInit[i], + (regNumber)regArg[i]); + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + if (regArgNum == 0) + { + break; + } + } + else if (k == i) + { + GetEmitter()->emitIns_R_R(INS_fmov_d, EA_PTRSIZE, REG_SCRATCH_FLT, + (regNumber)regArg[i]); + GetEmitter()->emitIns_R_R(INS_fmov_d, EA_PTRSIZE, (regNumber)regArgInit[j], + (regNumber)regArg[j]); + GetEmitter()->emitIns_R_R(INS_fmov_d, EA_PTRSIZE, (regNumber)regArgInit[i], + REG_SCRATCH_FLT); + regArgNum--; + regArgMaskLive &= ~genRegMask((regNumber)regArg[j]); + regArg[j] = 0; + if (regArgNum == 0) + { + break; + } + } + else + { + NYI_LOONGARCH64("-----------CodeGen::genFnPrologCalleeRegArgs() error!--"); + } + } + } + } } - break; } + assert(regArgNum == 0); } assert(!regArgMaskLive); diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index b652a604a6725..977240b262d89 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6042,13 +6042,14 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, AllocMemArgs args; memset(&args, 0, sizeof(args)); -#ifdef TARGET_ARM64 - // For arm64, we want to allocate JIT data always adjacent to code similar to what native compiler does. +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does. // This way allows us to use a single `ldr` to access such data like float constant/jmp table. + // For LoongArch64 using `pcaddi + ld` to access such data. if (emitTotalColdCodeSize > 0) { // JIT data might be far away from the cold code. - NYI_ARM64("Need to handle fix-up to data from cold code."); + NYI("Need to handle fix-up to data from cold code."); } UNATIVE_OFFSET roDataAlignmentDelta = 0; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 0c9beefde2cc8..a103029ace203 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2075,7 +2075,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) { canPromote = false; } -#if defined(TARGET_ARMARCH) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) else { for (unsigned i = 0; canPromote && (i < fieldCnt); i++) @@ -2208,6 +2208,17 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) lclNum, structPromotionInfo.fieldCnt); shouldPromote = false; } +#if defined(TARGET_LOONGARCH64) + else if ((structPromotionInfo.fieldCnt == 2) && (varTypeIsFloating(structPromotionInfo.fields[0].fldType) || + varTypeIsFloating(structPromotionInfo.fields[1].fldType))) + { + // TODO-LoongArch64 - struct passed by float registers. + JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with " + "float field(s).\n", + lclNum, structPromotionInfo.fieldCnt); + shouldPromote = false; + } +#endif #endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) { diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 096b7eaeb54be..ab5d14166978e 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -1851,7 +1851,19 @@ class CSE_Heuristic // // Thus we might need to use large displacements when loading or storing // to CSE LclVars that are not enregistered - // On ARM64 this means using rsGetRsvdReg() to hold the large displacement + // On ARM64 this means using rsGetRsvdReg() or R21 to hold the large displacement + // + largeFrame = true; + break; // early out, we don't need to keep increasing frameSize + } +#elif defined(TARGET_LOONGARCH64) + if (frameSize > 0x7ff) + { + // We likely have a large stack frame. + // + // Thus we might need to use large displacements when loading or storing + // to CSE LclVars that are not enregistered + // On LoongArch64 this means using rsGetRsvdReg() to hold the large displacement // largeFrame = true; break; // early out, we don't need to keep increasing frameSize diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 25355994d385b..f704b4b256afb 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -26,8 +26,8 @@ #endif // FEATURE_SIMD #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog - #define FEATURE_STRUCTPROMOTE 0 // JIT Optimization to promote fields of structs into registers - #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers + #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers + #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set