From 2c7d76e984472b8deec55d1505c396b365897716 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Sat, 24 Apr 2021 22:38:33 +0300 Subject: [PATCH 01/30] Fix bool-check call and add nullptr --- src/coreclr/jit/compiler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 19f374ffe27e7..87d0b9159f687 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -7451,8 +7451,8 @@ void Compiler::compJitStats() void Compiler::compCallArgStats() { - GenTree* args; - GenTree* argx; + GenTree* args = nullptr; + GenTree* argx = nullptr; unsigned argNum; @@ -7512,7 +7512,7 @@ void Compiler::compCallArgStats() regArgDeferred++; argTotalObjPtr++; - if (call->IsVirtual()) + if (call->AsCall()->IsVirtual()) { /* virtual function */ argVirtualCalls++; From 47517a722becdaa21e2f872f9d1cde8aa4377a1c Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Sat, 24 Apr 2021 22:43:32 +0300 Subject: [PATCH 02/30] Add lvar dataSize If def DISPLAY_SIZES = 1 in jit.h, then gives and error --- src/coreclr/jit/codegen.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 1770947d64021..bdc217543eed8 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -208,6 +208,7 @@ class CodeGen final : public CodeGenInterface void** codePtr; uint32_t* nativeSizeOfCode; unsigned codeSize; + size_t dataSize; void* coldCodePtr; void* consPtr; From 7b2a14a1a27141d939e92120d1eedf0d611de47b Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Sat, 24 Apr 2021 22:50:18 +0300 Subject: [PATCH 03/30] Remove type def If def DISPLAY_SIZES = 1, gives an error --- src/coreclr/jit/codegencommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 6f9d5f1c5ed40..5e3da25eb9483 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2296,7 +2296,7 @@ void CodeGen::genEmitMachineCode() #if DISPLAY_SIZES - size_t dataSize = GetEmitter()->emitDataSize(); + dataSize = GetEmitter()->emitDataSize(); #endif // DISPLAY_SIZES From 0ee8295c2888ab25f068fac7318d6f04ea1687e2 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Sun, 25 Apr 2021 12:10:56 +0300 Subject: [PATCH 04/30] Move def dataSize --- src/coreclr/jit/compiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 87d0b9159f687..bb656084ab7af 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -342,6 +342,7 @@ size_t gcHeaderISize; // GC header size: interruptible methods size_t gcPtrMapISize; // GC pointer map size: interruptible methods size_t gcHeaderNSize; // GC header size: non-interruptible methods size_t gcPtrMapNSize; // GC pointer map size: non-interruptible methods +size_t dataSize; #endif // DISPLAY_SIZES From 49fd8376e7aa29befbaa829707c868807738b3ea Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Sun, 25 Apr 2021 12:11:51 +0300 Subject: [PATCH 05/30] Update codegen.h --- src/coreclr/jit/codegen.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index bdc217543eed8..1770947d64021 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -208,7 +208,6 @@ class CodeGen final : public CodeGenInterface void** codePtr; uint32_t* nativeSizeOfCode; unsigned codeSize; - size_t dataSize; void* coldCodePtr; void* consPtr; From 3a542a4c9c780d1767331d07b99becbc6ebbfc7c Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Sun, 25 Apr 2021 12:13:21 +0300 Subject: [PATCH 06/30] Update compiler.h --- src/coreclr/jit/compiler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 584e3568f7f51..117bdc13024c9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11199,6 +11199,7 @@ extern size_t gcHeaderISize; extern size_t gcPtrMapISize; extern size_t gcHeaderNSize; extern size_t gcPtrMapNSize; +extern size_t dataSize; #endif // DISPLAY_SIZES From 0d38ce3f107f6e3d11b0ea146cb48fc6bd21cb7e Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Sun, 25 Apr 2021 21:16:22 +0300 Subject: [PATCH 07/30] Rename "dataSize" to "eDataSize" (EmitDataSize) because of intersection of names "dataSize" between "compiler.h" and in "codegenarm64.cpp" --- src/coreclr/jit/codegenarm64.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 737db8dc424a6..993b93b26a0b2 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2805,7 +2805,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) genConsumeAddress(addr); genConsumeRegs(data); - emitAttr dataSize = emitActualTypeSize(data); + emitAttr eDataSize = emitActualTypeSize(data); if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { @@ -2814,23 +2814,23 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) switch (treeNode->gtOper) { case GT_XORR: - GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + GetEmitter()->emitIns_R_R_R(INS_ldsetal, eDataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, addrReg); break; case GT_XAND: { // Grab a temp reg to perform `MVN` for dataReg first. regNumber tempReg = treeNode->GetSingleTempReg(); - GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg); - GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + GetEmitter()->emitIns_R_R(INS_mvn, eDataSize, tempReg, dataReg); + GetEmitter()->emitIns_R_R_R(INS_ldclral, eDataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, addrReg); break; } case GT_XCHG: - GetEmitter()->emitIns_R_R_R(INS_swpal, dataSize, dataReg, targetReg, addrReg); + GetEmitter()->emitIns_R_R_R(INS_swpal, eDataSize, dataReg, targetReg, addrReg); break; case GT_XADD: - GetEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + GetEmitter()->emitIns_R_R_R(INS_ldaddal, eDataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, addrReg); break; default: @@ -2888,7 +2888,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) genDefineTempLabel(labelRetry); // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(INS_ldaxr, dataSize, loadReg, addrReg); + GetEmitter()->emitIns_R_R(INS_ldaxr, eDataSize, loadReg, addrReg); switch (treeNode->OperGet()) { @@ -2897,12 +2897,12 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { // Even though INS_add is specified here, the encoder will choose either // an INS_add or an INS_sub and encode the immediate as a positive value - genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), + genInstrWithConstant(INS_add, eDataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), REG_NA); } else { - GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg); + GetEmitter()->emitIns_R_R_R(INS_add, eDataSize, storeDataReg, loadReg, dataReg); } break; case GT_XCHG: @@ -2914,7 +2914,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) } // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(INS_stlxr, dataSize, exResultReg, storeDataReg, addrReg); + GetEmitter()->emitIns_R_R_R(INS_stlxr, eDataSize, exResultReg, storeDataReg, addrReg); GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); @@ -2954,18 +2954,18 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { - emitAttr dataSize = emitActualTypeSize(data); + emitAttr eDataSize = emitActualTypeSize(data); // casal use the comparand as the target reg if (targetReg != comparandReg) { - GetEmitter()->emitIns_R_R(INS_mov, dataSize, targetReg, comparandReg); + GetEmitter()->emitIns_R_R(INS_mov, eDataSize, targetReg, comparandReg); // Catch case we destroyed data or address before use noway_assert(addrReg != targetReg); noway_assert(dataReg != targetReg); } - GetEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg); + GetEmitter()->emitIns_R_R_R(INS_casal, eDataSize, targetReg, dataReg, addrReg); } else { From dd964874de161832307eda53a679f5d99053f309 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Mon, 26 Apr 2021 15:16:05 +0300 Subject: [PATCH 08/30] Grammar fix compiler.h --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 117bdc13024c9..0b17b1d2105fd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9457,7 +9457,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS #if defined(DEBUG) || defined(INLINE_DATA) - // Method hash is logcally const, but computed + // Method hash is logically const, but computed // on first demand. mutable unsigned compMethodHashPrivate; unsigned compMethodHash() const; From 52c08c63b8e2c9fa49bdefcc5972fac0f092443c Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Fri, 30 Apr 2021 00:40:59 +0300 Subject: [PATCH 09/30] Grammar fix --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d4c1a188570b2..75d47ba7aaccd 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16564,7 +16564,7 @@ bool GenTree::IsPhiDefn() // comp - the Compiler object. // // Return Value: -// Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type +// Returns "true" if 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type // is not the same size as the type of the GT_LCL_VAR bool GenTree::IsPartialLclFld(Compiler* comp) From 2da97356ad7e84a462fdf3429ff99490e1f7b0f9 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Tue, 4 May 2021 19:59:22 +0300 Subject: [PATCH 10/30] Add files via upload --- src/coreclr/jit/codegenxarch.cpp | 18027 ++++++------- src/coreclr/jit/morph.cpp | 38904 +++++++++++++++-------------- 2 files changed, 28508 insertions(+), 28423 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 756a2811a12e4..dcb60dc06c4ea 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1,9008 +1,9019 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX Amd64/x86 Code Generator XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ -#include "jitpch.h" -#ifdef _MSC_VER -#pragma hdrstop -#pragma warning(disable : 4310) // cast truncates constant value - happens for (int8_t)0xb1 -#endif - -#ifdef TARGET_XARCH -#include "emit.h" -#include "codegen.h" -#include "lower.h" -#include "gcinfo.h" -#include "gcinfoencoder.h" -#include "patchpointinfo.h" - -/***************************************************************************** - * - * Generate code that will set the given register to the integer constant. - */ - -void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) -{ - // Reg cannot be a FP reg - assert(!genIsValidFloatReg(reg)); - - // The only TYP_REF constant that can come this path is a managed 'null' since it is not - // relocatable. Other ref type constants (e.g. string objects) go through a different - // code path. - noway_assert(type != TYP_REF || val == 0); - - if (val == 0) - { - instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags); - } - else - { - // TODO-XArch-CQ: needs all the optimized cases - GetEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val); - } -} - -//--------------------------------------------------------------------- -// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog. -// -// Arguments: -// initReg - register to use as a scratch register -// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if -// this call sets 'initReg' to a non-zero value. -// -// Return Value: -// None -// -void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (!compiler->getNeedsGSSecurityCookie()) - { - return; - } - - if (compiler->opts.IsOSR() && compiler->info.compPatchpointInfo->HasSecurityCookie()) - { - // Security cookie is on original frame and was initialized there. - return; - } - - if (compiler->gsGlobalSecurityCookieAddr == nullptr) - { - noway_assert(compiler->gsGlobalSecurityCookieVal != 0); -#ifdef TARGET_AMD64 - if ((int)compiler->gsGlobalSecurityCookieVal != compiler->gsGlobalSecurityCookieVal) - { - // initReg = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = initReg - genSetRegToIcon(initReg, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); - GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); - *pInitRegZeroed = false; - } - else -#endif - { - // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal - GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, - (int)compiler->gsGlobalSecurityCookieVal); - } - } - else - { - // Always use EAX on x86 and x64 - // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it. - // mov eax, dword ptr [compiler->gsGlobalSecurityCookieAddr] - // mov dword ptr [frame.GSSecurityCookie], eax - GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_EAX, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - regSet.verifyRegUsed(REG_EAX); - GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, REG_EAX, compiler->lvaGSSecurityCookie, 0); - if (initReg == REG_EAX) - { - *pInitRegZeroed = false; - } - } -} - -/***************************************************************************** - * - * Generate code to check that the GS cookie wasn't thrashed by a buffer - * overrun. If pushReg is true, preserve all registers around code sequence. - * Otherwise ECX could be modified. - * - * Implementation Note: pushReg = true, in case of tail calls. - */ -void CodeGen::genEmitGSCookieCheck(bool pushReg) -{ - noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); - - // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while - // executing GS cookie check will not collect the object pointed to by EAX. - // - // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX - // In such case make sure that the correct GC-ness of RDX is reported as well, so - // a GC object pointed by RDX will not be collected. - if (!pushReg) - { - // Handle multi-reg return type values - if (compiler->compMethodReturnsMultiRegRetType()) - { - ReturnTypeDesc retTypeDesc; - if (varTypeIsLong(compiler->info.compRetNativeType)) - { - retTypeDesc.InitializeLongReturnType(); - } - else // we must have a struct return type - { - retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass, - compiler->info.compCallConv); - } - - const unsigned regCount = retTypeDesc.GetReturnRegCount(); - - // Only x86 and x64 Unix ABI allows multi-reg return and - // number of result regs should be equal to MAX_RET_REG_COUNT. - assert(regCount == MAX_RET_REG_COUNT); - - for (unsigned i = 0; i < regCount; ++i) - { - gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i)); - } - } - else if (compiler->compMethodReturnsRetBufAddr()) - { - // This is for returning in an implicit RetBuf. - // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef. - - // In case the return is in an implicit RetBuf, the native return type should be a struct - assert(varTypeIsStruct(compiler->info.compRetNativeType)); - - gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); - } - // ... all other cases. - else - { -#ifdef TARGET_AMD64 - // For x64, structs that are not returned in registers are always - // returned in implicit RetBuf. If we reached here, we should not have - // a RetBuf and the return type should not be a struct. - assert(compiler->info.compRetBuffArg == BAD_VAR_NUM); - assert(!varTypeIsStruct(compiler->info.compRetNativeType)); -#endif // TARGET_AMD64 - - // For x86 Windows we can't make such assertions since we generate code for returning of - // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise - // compRetNativeType could be TYP_STRUCT. - gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType); - } - } - - regNumber regGSCheck; - regMaskTP regMaskGSCheck = RBM_NONE; - - if (!pushReg) - { - // Non-tail call: we can use any callee trash register that is not - // a return register or contain 'this' pointer (keep alive this), since - // we are generating GS cookie check after a GT_RETURN block. - // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well - // as return register for two-register-returned structs. - if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg() && - (compiler->lvaTable[compiler->info.compThisArg].GetRegNum() == REG_ARG_0)) - { - regGSCheck = REG_ARG_1; - } - else - { - regGSCheck = REG_ARG_0; - } - } - else - { -#ifdef TARGET_X86 - // It doesn't matter which register we pick, since we're going to save and restore it - // around the check. - // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes? - regGSCheck = REG_EAX; - regMaskGSCheck = RBM_EAX; -#else // !TARGET_X86 - // Jmp calls: specify method handle using which JIT queries VM for its entry point - // address and hence it can neither be a VSD call nor PInvoke calli with cookie - // parameter. Therefore, in case of jmp calls it is safe to use R11. - regGSCheck = REG_R11; -#endif // !TARGET_X86 - } - - regMaskTP byrefPushedRegs = RBM_NONE; - regMaskTP norefPushedRegs = RBM_NONE; - regMaskTP pushedRegs = RBM_NONE; - - if (compiler->gsGlobalSecurityCookieAddr == nullptr) - { -#if defined(TARGET_AMD64) - // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'. - // Otherwise, load the value into a reg and use 'cmp mem64, reg64'. - if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal) - { - genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); - GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); - } - else -#endif // defined(TARGET_AMD64) - { - assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal); - GetEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, - (int)compiler->gsGlobalSecurityCookieVal); - } - } - else - { - // Ngen case - GS cookie value needs to be accessed through an indirection. - - pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs); - - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0); - GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); - } - - BasicBlock* gsCheckBlk = genCreateTempLabel(); - inst_JMP(EJ_je, gsCheckBlk); - genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); - genDefineTempLabel(gsCheckBlk); - - genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs); -} - -BasicBlock* CodeGen::genCallFinally(BasicBlock* block) -{ -#if defined(FEATURE_EH_FUNCLETS) - // Generate a call to the finally, like this: - // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym - // call finally-funclet - // jmp finally-return // Only for non-retless finally calls - // The jmp can be a NOP if we're going to the next block. - // If we're generating code for the main function (not a funclet), and there is no localloc, - // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP - // instead of loading the PSPSym in this case, or if PSPSym is not used (CoreRT ABI). - - if ((compiler->lvaPSPSym == BAD_VAR_NUM) || - (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))) - { -#ifndef UNIX_X86_ABI - inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL); -#endif // !UNIX_X86_ABI - } - else - { - GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0); - } - GetEmitter()->emitIns_J(INS_call, block->bbJumpDest); - - if (block->bbFlags & BBF_RETLESS_CALL) - { - // We have a retless call, and the last instruction generated was a call. - // If the next block is in a different EH region (or is the end of the code - // block), then we need to generate a breakpoint here (since it will never - // get executed) to get proper unwind behavior. - - if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) - { - instGen(INS_BREAKPOINT); // This should never get executed - } - } - else - { -// TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other -// architectures? -#ifndef JIT32_GCENCODER - // Because of the way the flowgraph is connected, the liveness info for this one instruction - // after the call is not (can not be) correct in cases where a variable has a last use in the - // handler. So turn off GC reporting for this single instruction. - GetEmitter()->emitDisableGC(); -#endif // JIT32_GCENCODER - - // Now go to where the finally funclet needs to return to. - if (block->bbNext->bbJumpDest == block->bbNext->bbNext) - { - // Fall-through. - // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly - // to the next instruction? This would depend on stack walking from within the finally - // handler working without this instruction being in this special EH region. - instGen(INS_nop); - } - else - { - inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); - } - -#ifndef JIT32_GCENCODER - GetEmitter()->emitEnableGC(); -#endif // JIT32_GCENCODER - } - -#else // !FEATURE_EH_FUNCLETS - - // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot - // corresponding to the finally's nesting level. When invoked in response to an exception, the - // EE does this. - // - // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS. - // - // We will emit : - // mov [ebp - (n + 1)], 0 - // mov [ebp - n ], 0xFC - // push &step - // jmp finallyBlock - // ... - // step: - // mov [ebp - n ], 0 - // jmp leaveTarget - // ... - // leaveTarget: - - noway_assert(isFramePointerUsed()); - - // Get the nesting level which contains the finally - unsigned finallyNesting = 0; - compiler->fgGetNestingLevel(block, &finallyNesting); - - // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) - unsigned filterEndOffsetSlotOffs; - filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); - - unsigned curNestingSlotOffs; - curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE)); - - // Zero out the slot for the next nesting level - GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, - curNestingSlotOffs - TARGET_POINTER_SIZE, 0); - GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, curNestingSlotOffs, LCL_FINALLY_MARK); - - // Now push the address where the finally funclet should return to directly. - if (!(block->bbFlags & BBF_RETLESS_CALL)) - { - assert(block->isBBCallAlwaysPair()); - GetEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest); - } - else - { - // EE expects a DWORD, so we provide 0 - inst_IV(INS_push_hide, 0); - } - - // Jump to the finally BB - inst_JMP(EJ_jmp, block->bbJumpDest); - -#endif // !FEATURE_EH_FUNCLETS - - // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the - // jump target using bbJumpDest - that is already used to point - // to the finally block. So just skip past the BBJ_ALWAYS unless the - // block is RETLESS. - if (!(block->bbFlags & BBF_RETLESS_CALL)) - { - assert(block->isBBCallAlwaysPair()); - block = block->bbNext; - } - return block; -} - -#if defined(FEATURE_EH_FUNCLETS) -void CodeGen::genEHCatchRet(BasicBlock* block) -{ - // Set RAX to the address the VM should return to after the catch. - // Generate a RIP-relative - // lea reg, [rip + disp32] ; the RIP is implicit - // which will be position-independent. - GetEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET); -} - -#else // !FEATURE_EH_FUNCLETS - -void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block) -{ - // The last statement of the block must be a GT_RETFILT, which has already been generated. - assert(block->lastNode() != nullptr); - assert(block->lastNode()->OperGet() == GT_RETFILT); - - if (block->bbJumpKind == BBJ_EHFINALLYRET) - { - assert(block->lastNode()->AsOp()->gtOp1 == nullptr); // op1 == nullptr means endfinally - - // Return using a pop-jmp sequence. As the "try" block calls - // the finally with a jmp, this leaves the x86 call-ret stack - // balanced in the normal flow of path. - - noway_assert(isFramePointerRequired()); - inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); - inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); - } - else - { - assert(block->bbJumpKind == BBJ_EHFILTERRET); - - // The return value has already been computed. - instGen_Return(0); - } -} - -#endif // !FEATURE_EH_FUNCLETS - -// Move an immediate value into an integer register - -void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, - regNumber reg, - ssize_t imm, - insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(unsigned gtFlags)) -{ - // reg cannot be a FP register - assert(!genIsValidFloatReg(reg)); - - if (!compiler->opts.compReloc) - { - size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs - } - - if ((imm == 0) && !EA_IS_RELOC(size)) - { - instGen_Set_Reg_To_Zero(size, reg, flags); - } - else - { - if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm)) - { - emitAttr newSize = EA_PTR_DSP_RELOC; - if (EA_IS_BYREF(size)) - { - newSize = EA_SET_FLG(newSize, EA_BYREF_FLG); - } - - GetEmitter()->emitIns_R_AI(INS_lea, newSize, reg, imm); - } - else - { - GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm); - } - } - regSet.verifyRegUsed(reg); -} - -/*********************************************************************************** - * - * Generate code to set a register 'targetReg' of type 'targetType' to the constant - * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call - * genProduceReg() on the target register. - */ -void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree) -{ - switch (tree->gtOper) - { - case GT_CNS_INT: - { - // relocatable values tend to come down as a CNS_INT of native int type - // so the line between these two opcodes is kind of blurry - GenTreeIntConCommon* con = tree->AsIntConCommon(); - ssize_t cnsVal = con->IconValue(); - - if (con->ImmedValNeedsReloc(compiler)) - { - emitAttr size = EA_HANDLE_CNS_RELOC; - - if (targetType == TYP_BYREF) - { - size = EA_SET_FLG(size, EA_BYREF_FLG); - } - - instGen_Set_Reg_To_Imm(size, targetReg, cnsVal); - regSet.verifyRegUsed(targetReg); - } - else - { - genSetRegToIcon(targetReg, cnsVal, targetType); - } - } - break; - - case GT_CNS_DBL: - { - emitter* emit = GetEmitter(); - emitAttr size = emitTypeSize(targetType); - double constValue = tree->AsDblCon()->gtDconVal; - - // Make sure we use "xorps reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0) - if (*(__int64*)&constValue == 0) - { - // A faster/smaller way to generate 0 - emit->emitIns_R_R(INS_xorps, size, targetReg, targetReg); - } - else - { - CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size); - emit->emitIns_R_C(ins_Load(targetType), size, targetReg, hnd, 0); - } - } - break; - - default: - unreached(); - } -} - -//------------------------------------------------------------------------ -// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. -// -// Arguments: -// tree - the node -// -void CodeGen::genCodeForNegNot(GenTree* tree) -{ - assert(tree->OperIs(GT_NEG, GT_NOT)); - - regNumber targetReg = tree->GetRegNum(); - var_types targetType = tree->TypeGet(); - - if (varTypeIsFloating(targetType)) - { - assert(tree->gtOper == GT_NEG); - genSSE2BitwiseOp(tree); - } - else - { - GenTree* operand = tree->gtGetOp1(); - assert(operand->isUsedFromReg()); - regNumber operandReg = genConsumeReg(operand); - - if (operandReg != targetReg) - { - inst_RV_RV(INS_mov, targetReg, operandReg, targetType); - } - - instruction ins = genGetInsForOper(tree->OperGet(), targetType); - inst_RV(ins, targetReg, targetType); - } - - genProduceReg(tree); -} - -//------------------------------------------------------------------------ -// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node. -// -// Arguments: -// tree - the node -// -void CodeGen::genCodeForBswap(GenTree* tree) -{ - // TODO: If we're swapping immediately after a read from memory or immediately before - // a write to memory, use the MOVBE instruction instead of the BSWAP instruction if - // the platform supports it. - - assert(tree->OperIs(GT_BSWAP, GT_BSWAP16)); - - regNumber targetReg = tree->GetRegNum(); - var_types targetType = tree->TypeGet(); - - GenTree* operand = tree->gtGetOp1(); - assert(operand->isUsedFromReg()); - regNumber operandReg = genConsumeReg(operand); - - if (operandReg != targetReg) - { - inst_RV_RV(INS_mov, targetReg, operandReg, targetType); - } - - if (tree->OperIs(GT_BSWAP)) - { - // 32-bit and 64-bit byte swaps use "bswap reg" - inst_RV(INS_bswap, targetReg, targetType); - } - else - { - // 16-bit byte swaps use "ror reg.16, 8" - inst_RV_IV(INS_ror_N, targetReg, 8 /* val */, emitAttr::EA_2BYTE); - } - - genProduceReg(tree); -} - -// Generate code to get the high N bits of a N*N=2N bit multiplication result -void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) -{ - assert(!treeNode->gtOverflowEx()); - - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - emitter* emit = GetEmitter(); - emitAttr size = emitTypeSize(treeNode); - GenTree* op1 = treeNode->AsOp()->gtOp1; - GenTree* op2 = treeNode->AsOp()->gtOp2; - - // to get the high bits of the multiply, we are constrained to using the - // 1-op form: RDX:RAX = RAX * rm - // The 3-op form (Rx=Ry*Rz) does not support it. - - genConsumeOperands(treeNode->AsOp()); - - GenTree* regOp = op1; - GenTree* rmOp = op2; - - // Set rmOp to the memory operand (if any) - if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->GetRegNum() == REG_RAX))) - { - regOp = op2; - rmOp = op1; - } - assert(regOp->isUsedFromReg()); - - // Setup targetReg when neither of the source operands was a matching register - if (regOp->GetRegNum() != REG_RAX) - { - inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->GetRegNum(), targetType); - } - - instruction ins; - if ((treeNode->gtFlags & GTF_UNSIGNED) == 0) - { - ins = INS_imulEAX; - } - else - { - ins = INS_mulEAX; - } - emit->emitInsBinary(ins, size, treeNode, rmOp); - - // Move the result to the desired register, if necessary - if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX) - { - inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); - } - - genProduceReg(treeNode); -} - -#ifdef TARGET_X86 -//------------------------------------------------------------------------ -// genCodeForLongUMod: Generate code for a tree of the form -// `(umod (gt_long x y) (const int))` -// -// Arguments: -// node - the node for which to generate code -// -void CodeGen::genCodeForLongUMod(GenTreeOp* node) -{ - assert(node != nullptr); - assert(node->OperGet() == GT_UMOD); - assert(node->TypeGet() == TYP_INT); - - GenTreeOp* const dividend = node->gtOp1->AsOp(); - assert(dividend->OperGet() == GT_LONG); - assert(varTypeIsLong(dividend)); - - genConsumeOperands(node); - - GenTree* const dividendLo = dividend->gtOp1; - GenTree* const dividendHi = dividend->gtOp2; - assert(dividendLo->isUsedFromReg()); - assert(dividendHi->isUsedFromReg()); - - GenTree* const divisor = node->gtOp2; - assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT); - assert(divisor->gtSkipReloadOrCopy()->isUsedFromReg()); - assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2); - assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff); - - // dividendLo must be in RAX; dividendHi must be in RDX - genCopyRegIfNeeded(dividendLo, REG_EAX); - genCopyRegIfNeeded(dividendHi, REG_EDX); - - // At this point, EAX:EDX contains the 64bit dividend and op2->GetRegNum() - // contains the 32bit divisor. We want to generate the following code: - // - // cmp edx, divisor->GetRegNum() - // jb noOverflow - // - // mov temp, eax - // mov eax, edx - // xor edx, edx - // div divisor->GetRegNum() - // mov eax, temp - // - // noOverflow: - // div divisor->GetRegNum() - // - // This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c. - - BasicBlock* const noOverflow = genCreateTempLabel(); - - // cmp edx, divisor->GetRegNum() - // jb noOverflow - inst_RV_RV(INS_cmp, REG_EDX, divisor->GetRegNum()); - inst_JMP(EJ_jb, noOverflow); - - // mov temp, eax - // mov eax, edx - // xor edx, edx - // div divisor->GetRegNum() - // mov eax, temp - const regNumber tempReg = node->GetSingleTempReg(); - inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT); - inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT); - instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); - inst_RV(INS_div, divisor->GetRegNum(), TYP_INT); - inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT); - - // noOverflow: - // div divisor->GetRegNum() - genDefineTempLabel(noOverflow); - inst_RV(INS_div, divisor->GetRegNum(), TYP_INT); - - const regNumber targetReg = node->GetRegNum(); - if (targetReg != REG_EDX) - { - inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT); - } - genProduceReg(node); -} -#endif // TARGET_X86 - -//------------------------------------------------------------------------ -// genCodeForDivMod: Generate code for a DIV or MOD operation. -// -// Arguments: -// treeNode - the node to generate the code for -// -void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) -{ - assert(treeNode->OperIs(GT_DIV, GT_UDIV, GT_MOD, GT_UMOD)); - - GenTree* dividend = treeNode->gtOp1; - -#ifdef TARGET_X86 - if (varTypeIsLong(dividend->TypeGet())) - { - genCodeForLongUMod(treeNode); - return; - } -#endif // TARGET_X86 - - GenTree* divisor = treeNode->gtOp2; - genTreeOps oper = treeNode->OperGet(); - emitAttr size = emitTypeSize(treeNode); - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - emitter* emit = GetEmitter(); - - // Node's type must be int/native int, small integer types are not - // supported and floating point types are handled by genCodeForBinary. - assert(varTypeIsIntOrI(targetType)); - // dividend is in a register. - assert(dividend->isUsedFromReg()); - - genConsumeOperands(treeNode->AsOp()); - // dividend must be in RAX - genCopyRegIfNeeded(dividend, REG_RAX); - - // zero or sign extend rax to rdx - if (oper == GT_UMOD || oper == GT_UDIV || - (dividend->IsIntegralConst() && (dividend->AsIntConCommon()->IconValue() > 0))) - { - instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); - } - else - { - emit->emitIns(INS_cdq, size); - // the cdq instruction writes RDX, So clear the gcInfo for RDX - gcInfo.gcMarkRegSetNpt(RBM_RDX); - } - - // Perform the 'targetType' (64-bit or 32-bit) divide instruction - instruction ins; - if (oper == GT_UMOD || oper == GT_UDIV) - { - ins = INS_div; - } - else - { - ins = INS_idiv; - } - - emit->emitInsBinary(ins, size, treeNode, divisor); - - // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX. - // Move the result to the desired register, if necessary - if (oper == GT_DIV || oper == GT_UDIV) - { - if (targetReg != REG_RAX) - { - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - } - } - else - { - assert((oper == GT_MOD) || (oper == GT_UMOD)); - if (targetReg != REG_RDX) - { - inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); - } - } - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genCodeForBinary: Generate code for many binary arithmetic operators -// -// Arguments: -// treeNode - The binary operation for which we are generating code. -// -// Return Value: -// None. -// -// Notes: -// Integer MUL and DIV variants have special constraints on x64 so are not handled here. -// See the assert below for the operators that are handled. - -void CodeGen::genCodeForBinary(GenTreeOp* treeNode) -{ -#ifdef DEBUG - bool isValidOper = treeNode->OperIs(GT_ADD, GT_SUB); - if (varTypeIsFloating(treeNode->TypeGet())) - { - isValidOper |= treeNode->OperIs(GT_MUL, GT_DIV); - } - else - { - isValidOper |= treeNode->OperIs(GT_AND, GT_OR, GT_XOR); -#ifndef TARGET_64BIT - isValidOper |= treeNode->OperIs(GT_ADD_LO, GT_ADD_HI, GT_SUB_LO, GT_SUB_HI); -#endif - } - assert(isValidOper); -#endif - - genConsumeOperands(treeNode); - - const genTreeOps oper = treeNode->OperGet(); - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - emitter* emit = GetEmitter(); - - GenTree* op1 = treeNode->gtGetOp1(); - GenTree* op2 = treeNode->gtGetOp2(); - - // Commutative operations can mark op1 as contained or reg-optional to generate "op reg, memop/immed" - if (!op1->isUsedFromReg()) - { - assert(treeNode->OperIsCommutative()); - assert(op1->isMemoryOp() || op1->IsLocal() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || - op1->IsRegOptional()); - - op1 = treeNode->gtGetOp2(); - op2 = treeNode->gtGetOp1(); - } - - instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); - - // The arithmetic node must be sitting in a register (since it's not contained) - noway_assert(targetReg != REG_NA); - - regNumber op1reg = op1->isUsedFromReg() ? op1->GetRegNum() : REG_NA; - regNumber op2reg = op2->isUsedFromReg() ? op2->GetRegNum() : REG_NA; - - if (varTypeIsFloating(treeNode->TypeGet())) - { - // floating-point addition, subtraction, multiplication, and division - // all have RMW semantics if VEX support is not available - - bool isRMW = !compiler->canUseVexEncoding(); - inst_RV_RV_TT(ins, emitTypeSize(treeNode), targetReg, op1reg, op2, isRMW); - - genProduceReg(treeNode); - return; - } - - GenTree* dst; - GenTree* src; - - // This is the case of reg1 = reg1 op reg2 - // We're ready to emit the instruction without any moves - if (op1reg == targetReg) - { - dst = op1; - src = op2; - } - // We have reg1 = reg2 op reg1 - // In order for this operation to be correct - // we need that op is a commutative operation so - // we can convert it into reg1 = reg1 op reg2 and emit - // the same code as above - else if (op2reg == targetReg) - { - noway_assert(GenTree::OperIsCommutative(oper)); - dst = op2; - src = op1; - } - // now we know there are 3 different operands so attempt to use LEA - else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags - && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->gtSetFlags()) - { - if (op2->isContainedIntOrIImmed()) - { - emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, - (int)op2->AsIntConCommon()->IconValue()); - } - else - { - assert(op2reg != REG_NA); - emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0); - } - genProduceReg(treeNode); - return; - } - // dest, op1 and op2 registers are different: - // reg3 = reg1 op reg2 - // We can implement this by issuing a mov: - // reg3 = reg1 - // reg3 = reg3 op reg2 - else - { - var_types op1Type = op1->TypeGet(); - inst_RV_RV(ins_Copy(op1Type), targetReg, op1reg, op1Type); - regSet.verifyRegUsed(targetReg); - gcInfo.gcMarkRegPtrVal(targetReg, op1Type); - dst = treeNode; - src = op2; - } - - // try to use an inc or dec - if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx()) - { - if (src->IsIntegralConst(1)) - { - emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg); - genProduceReg(treeNode); - return; - } - else if (src->IsIntegralConst(-1)) - { - emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg); - genProduceReg(treeNode); - return; - } - } - regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src); - noway_assert(r == targetReg); - - if (treeNode->gtOverflowEx()) - { -#if !defined(TARGET_64BIT) - assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI); -#else - assert(oper == GT_ADD || oper == GT_SUB); -#endif - genCheckOverflow(treeNode); - } - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genCodeForMul: Generate code for a MUL operation. -// -// Arguments: -// treeNode - the node to generate the code for -// -void CodeGen::genCodeForMul(GenTreeOp* treeNode) -{ - assert(treeNode->OperIs(GT_MUL)); - - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - emitter* emit = GetEmitter(); - - // Node's type must be int or long (only on x64), small integer types are not - // supported and floating point types are handled by genCodeForBinary. - assert(varTypeIsIntOrI(targetType)); - - instruction ins; - emitAttr size = emitTypeSize(treeNode); - bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0); - bool requiresOverflowCheck = treeNode->gtOverflowEx(); - - GenTree* op1 = treeNode->gtGetOp1(); - GenTree* op2 = treeNode->gtGetOp2(); - - // there are 3 forms of x64 multiply: - // 1-op form with 128 result: RDX:RAX = RAX * rm - // 2-op form: reg *= rm - // 3-op form: reg = rm * imm - - genConsumeOperands(treeNode); - - // This matches the 'mul' lowering in Lowering::SetMulOpCounts() - // - // immOp :: Only one operand can be an immediate - // rmOp :: Only one operand can be a memory op. - // regOp :: A register op (especially the operand that matches 'targetReg') - // (can be nullptr when we have both a memory op and an immediate op) - - GenTree* immOp = nullptr; - GenTree* rmOp = op1; - GenTree* regOp; - - if (op2->isContainedIntOrIImmed()) - { - immOp = op2; - } - else if (op1->isContainedIntOrIImmed()) - { - immOp = op1; - rmOp = op2; - } - - if (immOp != nullptr) - { - // CQ: When possible use LEA for mul by imm 3, 5 or 9 - ssize_t imm = immOp->AsIntConCommon()->IconValue(); - - if (!requiresOverflowCheck && rmOp->isUsedFromReg() && ((imm == 3) || (imm == 5) || (imm == 9))) - { - // We will use the LEA instruction to perform this multiply - // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9. - unsigned int scale = (unsigned int)(imm - 1); - GetEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->GetRegNum(), rmOp->GetRegNum(), scale, 0); - } - else if (!requiresOverflowCheck && rmOp->isUsedFromReg() && (imm == genFindLowestBit(imm)) && (imm != 0)) - { - // Use shift for constant multiply when legal - uint64_t zextImm = static_cast(static_cast(imm)); - unsigned int shiftAmount = genLog2(zextImm); - - if (targetReg != rmOp->GetRegNum()) - { - // Copy reg src to dest register - inst_RV_RV(INS_mov, targetReg, rmOp->GetRegNum(), targetType); - } - inst_RV_SH(INS_shl, size, targetReg, shiftAmount); - } - else - { - // use the 3-op form with immediate - ins = GetEmitter()->inst3opImulForReg(targetReg); - emit->emitInsBinary(ins, size, rmOp, immOp); - } - } - else // we have no contained immediate operand - { - regOp = op1; - rmOp = op2; - - regNumber mulTargetReg = targetReg; - if (isUnsignedMultiply && requiresOverflowCheck) - { - ins = INS_mulEAX; - mulTargetReg = REG_RAX; - } - else - { - ins = INS_imul; - } - - // Set rmOp to the memory operand (if any) - // or set regOp to the op2 when it has the matching target register for our multiply op - // - if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->GetRegNum() == mulTargetReg))) - { - regOp = op2; - rmOp = op1; - } - assert(regOp->isUsedFromReg()); - - // Setup targetReg when neither of the source operands was a matching register - if (regOp->GetRegNum() != mulTargetReg) - { - inst_RV_RV(INS_mov, mulTargetReg, regOp->GetRegNum(), targetType); - } - - emit->emitInsBinary(ins, size, treeNode, rmOp); - - // Move the result to the desired register, if necessary - if ((ins == INS_mulEAX) && (targetReg != REG_RAX)) - { - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - } - } - - if (requiresOverflowCheck) - { - // Overflow checking is only used for non-floating point types - noway_assert(!varTypeIsFloating(treeNode)); - - genCheckOverflow(treeNode); - } - - genProduceReg(treeNode); -} - -#ifdef FEATURE_SIMD - -//------------------------------------------------------------------------ -// genSIMDSplitReturn: Generates code for returning a fixed-size SIMD type that lives -// in a single register, but is returned in multiple registers. -// -// Arguments: -// src - The source of the return -// retTypeDesc - The return type descriptor. -// -void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) -{ - assert(varTypeIsSIMD(src)); - assert(src->isUsedFromReg()); - - // This is a case of operand is in a single reg and needs to be - // returned in multiple ABI return registers. - regNumber opReg = src->GetRegNum(); - regNumber reg0 = retTypeDesc->GetABIReturnReg(0); - regNumber reg1 = retTypeDesc->GetABIReturnReg(1); - - assert((reg0 != REG_NA) && (reg1 != REG_NA) && (opReg != REG_NA)); - - const bool srcIsFloatReg = genIsValidFloatReg(opReg); - const bool dstIsFloatReg = genIsValidFloatReg(reg0); - assert(srcIsFloatReg); - -#ifdef TARGET_AMD64 - assert(src->TypeIs(TYP_SIMD16)); - assert(srcIsFloatReg == dstIsFloatReg); - if (opReg != reg0 && opReg != reg1) - { - // Operand reg is different from return regs. - // Copy opReg to reg0 and let it to be handled by one of the - // two cases below. - inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); - opReg = reg0; - } - - if (opReg == reg0) - { - assert(opReg != reg1); - // reg1 = opReg. - inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg1, opReg, TYP_SIMD16); - } - else - { - assert(opReg == reg1); - - // reg0 = opReg. - - inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); - } - // reg0 - already has required 8-byte in bit position [63:0]. - // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. - inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01); - -#else // TARGET_X86 - assert(src->TypeIs(TYP_SIMD8)); - assert(srcIsFloatReg != dstIsFloatReg); - assert((reg0 == REG_EAX) && (reg1 == REG_EDX)); - // reg0 = opReg[31:0] - inst_RV_RV(ins_Copy(opReg, TYP_INT), reg0, opReg, TYP_INT); - // reg1 = opRef[61:32] - if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1); - } - else - { - int8_t shuffleMask = 1; // we only need [61:32]->[31:0], the rest is not read. - inst_RV_TT_IV(INS_pshufd, EA_8BYTE, opReg, src, shuffleMask); - inst_RV_RV(ins_Copy(opReg, TYP_INT), reg1, opReg, TYP_INT); - } -#endif // TARGET_X86 -} - -#endif // FEATURE_SIMD - -#if defined(TARGET_X86) - -//------------------------------------------------------------------------ -// genFloatReturn: Generates code for float return statement for x86. -// -// Note: treeNode's and op1's registers are already consumed. -// -// Arguments: -// treeNode - The GT_RETURN or GT_RETFILT tree node with float type. -// -// Return Value: -// None -// -void CodeGen::genFloatReturn(GenTree* treeNode) -{ - assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); - assert(varTypeIsFloating(treeNode)); - - GenTree* op1 = treeNode->gtGetOp1(); - // Spill the return value register from an XMM register to the stack, then load it on the x87 stack. - // If it already has a home location, use that. Otherwise, we need a temp. - if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->AsLclVarCommon()->GetLclNum()].lvOnFrame) - { - if (compiler->lvaTable[op1->AsLclVarCommon()->GetLclNum()].GetRegNum() != REG_STK) - { - op1->gtFlags |= GTF_SPILL; - inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->AsLclVarCommon()->GetLclNum())), - emitTypeSize(op1->TypeGet()), op1, op1->GetRegNum()); - } - // Now, load it to the fp stack. - GetEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->GetLclNum(), 0); - } - else - { - // Spill the value, which should be in a register, then load it to the fp stack. - // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). - op1->gtFlags |= GTF_SPILL; - regSet.rsSpillTree(op1->GetRegNum(), op1); - op1->gtFlags |= GTF_SPILLED; - op1->gtFlags &= ~GTF_SPILL; - - TempDsc* t = regSet.rsUnspillInPlace(op1, op1->GetRegNum()); - inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); - op1->gtFlags &= ~GTF_SPILLED; - regSet.tmpRlsTemp(t); - } -} -#endif // TARGET_X86 - -//------------------------------------------------------------------------ -// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE/GT_CMP node. -// -// Arguments: -// tree - the node -// -void CodeGen::genCodeForCompare(GenTreeOp* tree) -{ - assert(tree->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT, GT_TEST_EQ, GT_TEST_NE, GT_CMP)); - - // TODO-XArch-CQ: Check if we can use the currently set flags. - // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register - // (signed < or >= where targetReg != REG_NA) - - GenTree* op1 = tree->gtOp1; - var_types op1Type = op1->TypeGet(); - - if (varTypeIsFloating(op1Type)) - { - genCompareFloat(tree); - } - else - { - genCompareInt(tree); - } -} - -//------------------------------------------------------------------------ -// genCodeForBT: Generates code for a GT_BT node. -// -// Arguments: -// tree - The node. -// -void CodeGen::genCodeForBT(GenTreeOp* bt) -{ - assert(bt->OperIs(GT_BT)); - - GenTree* op1 = bt->gtGetOp1(); - GenTree* op2 = bt->gtGetOp2(); - var_types type = genActualType(op1->TypeGet()); - - assert(op1->isUsedFromReg() && op2->isUsedFromReg()); - assert((genTypeSize(type) >= genTypeSize(TYP_INT)) && (genTypeSize(type) <= genTypeSize(TYP_I_IMPL))); - - genConsumeOperands(bt); - // Note that the emitter doesn't fully support INS_bt, it only supports the reg,reg - // form and encodes the registers in reverse order. To get the correct order we need - // to reverse the operands when calling emitIns_R_R. - GetEmitter()->emitIns_R_R(INS_bt, emitTypeSize(type), op2->GetRegNum(), op1->GetRegNum()); -} - -// clang-format off -const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] -{ - { }, // NONE - { }, // 1 - { EJ_jl }, // SLT - { EJ_jle }, // SLE - { EJ_jge }, // SGE - { EJ_jg }, // SGT - { EJ_js }, // S - { EJ_jns }, // NS - - { EJ_je }, // EQ - { EJ_jne }, // NE - { EJ_jb }, // ULT - { EJ_jbe }, // ULE - { EJ_jae }, // UGE - { EJ_ja }, // UGT - { EJ_jb }, // C - { EJ_jae }, // NC - - // Floating point compare instructions (UCOMISS, UCOMISD etc.) set the condition flags as follows: - // ZF PF CF Meaning - // --------------------- - // 1 1 1 Unordered - // 0 0 0 Greater - // 0 0 1 Less Than - // 1 0 0 Equal - // - // Since ZF and CF are also set when the result is unordered, in some cases we first need to check - // PF before checking ZF/CF. In general, ordered conditions will result in a jump only if PF is not - // set and unordered conditions will result in a jump only if PF is set. - - { EJ_jnp, GT_AND, EJ_je }, // FEQ - { EJ_jne }, // FNE - { EJ_jnp, GT_AND, EJ_jb }, // FLT - { EJ_jnp, GT_AND, EJ_jbe }, // FLE - { EJ_jae }, // FGE - { EJ_ja }, // FGT - { EJ_jo }, // O - { EJ_jno }, // NO - - { EJ_je }, // FEQU - { EJ_jp, GT_OR, EJ_jne }, // FNEU - { EJ_jb }, // FLTU - { EJ_jbe }, // FLEU - { EJ_jp, GT_OR, EJ_jae }, // FGEU - { EJ_jp, GT_OR, EJ_ja }, // FGTU - { EJ_jp }, // P - { EJ_jnp }, // NP -}; -// clang-format on - -//------------------------------------------------------------------------ -// inst_SETCC: Generate code to set a register to 0 or 1 based on a condition. -// -// Arguments: -// condition - The condition -// type - The type of the value to be produced -// dstReg - The destination register to be set to 1 or 0 -// -void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg) -{ - assert(varTypeIsIntegral(type)); - assert(genIsValidIntReg(dstReg) && isByteReg(dstReg)); - - const GenConditionDesc& desc = GenConditionDesc::Get(condition); - - inst_SET(desc.jumpKind1, dstReg); - - if (desc.oper != GT_NONE) - { - BasicBlock* labelNext = genCreateTempLabel(); - inst_JMP((desc.oper == GT_OR) ? desc.jumpKind1 : emitter::emitReverseJumpKind(desc.jumpKind1), labelNext); - inst_SET(desc.jumpKind2, dstReg); - genDefineTempLabel(labelNext); - } - - if (!varTypeIsByte(type)) - { - GetEmitter()->emitIns_R_R(INS_movzx, EA_1BYTE, dstReg, dstReg); - } -} - -//------------------------------------------------------------------------ -// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. -// -// Arguments: -// tree - the GT_RETURNTRAP node -// -void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) -{ - assert(tree->OperGet() == GT_RETURNTRAP); - - // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC - // based on the contents of 'data' - - GenTree* data = tree->gtOp1; - genConsumeRegs(data); - GenTreeIntCon cns = intForm(TYP_INT, 0); - cns.SetContained(); - GetEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns); - - BasicBlock* skipLabel = genCreateTempLabel(); - - inst_JMP(EJ_je, skipLabel); - - // emit the call to the EE-helper that stops for GC (or other reasons) - regNumber tmpReg = tree->GetSingleTempReg(RBM_ALLINT); - assert(genIsValidIntReg(tmpReg)); - - genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg); - genDefineTempLabel(skipLabel); -} - -/***************************************************************************** - * - * Generate code for a single node in the tree. - * Preconditions: All operands have been evaluated - * - */ -void CodeGen::genCodeForTreeNode(GenTree* treeNode) -{ - regNumber targetReg; -#if !defined(TARGET_64BIT) - if (treeNode->TypeGet() == TYP_LONG) - { - // All long enregistered nodes will have been decomposed into their - // constituent lo and hi nodes. - targetReg = REG_NA; - } - else -#endif // !defined(TARGET_64BIT) - { - targetReg = treeNode->GetRegNum(); - } - var_types targetType = treeNode->TypeGet(); - emitter* emit = GetEmitter(); - -#ifdef DEBUG - // Validate that all the operands for the current node are consumed in order. - // This is important because LSRA ensures that any necessary copies will be - // handled correctly. - lastConsumedNode = nullptr; - if (compiler->verbose) - { - unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio - compiler->gtDispLIRNode(treeNode, "Generating: "); - } -#endif // DEBUG - - // Is this a node whose value is already in a register? LSRA denotes this by - // setting the GTF_REUSE_REG_VAL flag. - if (treeNode->IsReuseRegVal()) - { - // For now, this is only used for constant nodes. - assert((treeNode->OperIsConst())); - JITDUMP(" TreeNode is marked ReuseReg\n"); - return; - } - - // contained nodes are part of their parents for codegen purposes - // ex : immediates, most LEAs - if (treeNode->isContained()) - { - return; - } - - switch (treeNode->gtOper) - { -#ifndef JIT32_GCENCODER - case GT_START_NONGC: - GetEmitter()->emitDisableGC(); - break; -#endif // !defined(JIT32_GCENCODER) - - case GT_START_PREEMPTGC: - // Kill callee saves GC registers, and create a label - // so that information gets propagated to the emitter. - gcInfo.gcMarkRegSetNpt(RBM_INT_CALLEE_SAVED); - genDefineTempLabel(genCreateTempLabel()); - break; - - case GT_PROF_HOOK: -#ifdef PROFILING_SUPPORTED - // We should be seeing this only if profiler hook is needed - noway_assert(compiler->compIsProfilerHookNeeded()); - - // Right now this node is used only for tail calls. In future if - // we intend to use it for Enter or Leave hooks, add a data member - // to this node indicating the kind of profiler hook. For example, - // helper number can be used. - genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); -#endif // PROFILING_SUPPORTED - break; - - case GT_LCLHEAP: - genLclHeap(treeNode); - break; - - case GT_CNS_INT: -#ifdef TARGET_X86 - assert(!treeNode->IsIconHandle(GTF_ICON_TLS_HDL)); -#endif // TARGET_X86 - FALLTHROUGH; - - case GT_CNS_DBL: - genSetRegToConst(targetReg, targetType, treeNode); - genProduceReg(treeNode); - break; - - case GT_NOT: - case GT_NEG: - genCodeForNegNot(treeNode); - break; - - case GT_BSWAP: - case GT_BSWAP16: - genCodeForBswap(treeNode); - break; - - case GT_DIV: - if (varTypeIsFloating(treeNode->TypeGet())) - { - genCodeForBinary(treeNode->AsOp()); - break; - } - FALLTHROUGH; - case GT_MOD: - case GT_UMOD: - case GT_UDIV: - genCodeForDivMod(treeNode->AsOp()); - break; - - case GT_OR: - case GT_XOR: - case GT_AND: - assert(varTypeIsIntegralOrI(treeNode)); - - FALLTHROUGH; - -#if !defined(TARGET_64BIT) - case GT_ADD_LO: - case GT_ADD_HI: - case GT_SUB_LO: - case GT_SUB_HI: -#endif // !defined(TARGET_64BIT) - - case GT_ADD: - case GT_SUB: - genCodeForBinary(treeNode->AsOp()); - break; - - case GT_MUL: - if (varTypeIsFloating(treeNode->TypeGet())) - { - genCodeForBinary(treeNode->AsOp()); - break; - } - genCodeForMul(treeNode->AsOp()); - break; - - case GT_LSH: - case GT_RSH: - case GT_RSZ: - case GT_ROL: - case GT_ROR: - genCodeForShift(treeNode); - break; - -#if !defined(TARGET_64BIT) - - case GT_LSH_HI: - case GT_RSH_LO: - genCodeForShiftLong(treeNode); - break; - -#endif // !defined(TARGET_64BIT) - - case GT_CAST: - genCodeForCast(treeNode->AsOp()); - break; - - case GT_BITCAST: - genCodeForBitCast(treeNode->AsOp()); - break; - - case GT_LCL_FLD_ADDR: - case GT_LCL_VAR_ADDR: - genCodeForLclAddr(treeNode); - break; - - case GT_LCL_FLD: - genCodeForLclFld(treeNode->AsLclFld()); - break; - - case GT_LCL_VAR: - genCodeForLclVar(treeNode->AsLclVar()); - break; - - case GT_STORE_LCL_FLD: - genCodeForStoreLclFld(treeNode->AsLclFld()); - break; - - case GT_STORE_LCL_VAR: - genCodeForStoreLclVar(treeNode->AsLclVar()); - break; - - case GT_RETFILT: - case GT_RETURN: - genReturn(treeNode); - break; - - case GT_LEA: - // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. - genLeaInstruction(treeNode->AsAddrMode()); - break; - - case GT_INDEX_ADDR: - genCodeForIndexAddr(treeNode->AsIndexAddr()); - break; - - case GT_IND: - genCodeForIndir(treeNode->AsIndir()); - break; - - case GT_MULHI: -#ifdef TARGET_X86 - case GT_MUL_LONG: -#endif - genCodeForMulHi(treeNode->AsOp()); - break; - - case GT_INTRINSIC: - genIntrinsic(treeNode); - break; - -#ifdef FEATURE_SIMD - case GT_SIMD: - genSIMDIntrinsic(treeNode->AsSIMD()); - break; -#endif // FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS - case GT_HWINTRINSIC: - genHWIntrinsic(treeNode->AsHWIntrinsic()); - break; -#endif // FEATURE_HW_INTRINSICS - - case GT_CKFINITE: - genCkfinite(treeNode); - break; - - case GT_EQ: - case GT_NE: - case GT_LT: - case GT_LE: - case GT_GE: - case GT_GT: - case GT_TEST_EQ: - case GT_TEST_NE: - case GT_CMP: - genCodeForCompare(treeNode->AsOp()); - break; - - case GT_JTRUE: - genCodeForJumpTrue(treeNode->AsOp()); - break; - - case GT_JCC: - genCodeForJcc(treeNode->AsCC()); - break; - - case GT_SETCC: - genCodeForSetcc(treeNode->AsCC()); - break; - - case GT_BT: - genCodeForBT(treeNode->AsOp()); - break; - - case GT_RETURNTRAP: - genCodeForReturnTrap(treeNode->AsOp()); - break; - - case GT_STOREIND: - genCodeForStoreInd(treeNode->AsStoreInd()); - break; - - case GT_COPY: - // This is handled at the time we call genConsumeReg() on the GT_COPY - break; - - case GT_LIST: - case GT_FIELD_LIST: - // Should always be marked contained. - assert(!"LIST, FIELD_LIST nodes should always be marked contained."); - break; - - case GT_SWAP: - genCodeForSwap(treeNode->AsOp()); - break; - - case GT_PUTARG_STK: - genPutArgStk(treeNode->AsPutArgStk()); - break; - - case GT_PUTARG_REG: - genPutArgReg(treeNode->AsOp()); - break; - - case GT_CALL: - genCallInstruction(treeNode->AsCall()); - break; - - case GT_JMP: - genJmpMethod(treeNode); - break; - - case GT_LOCKADD: - genCodeForLockAdd(treeNode->AsOp()); - break; - - case GT_XCHG: - case GT_XADD: - genLockedInstructions(treeNode->AsOp()); - break; - - case GT_XORR: - case GT_XAND: - NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet."); - break; - - case GT_MEMORYBARRIER: - { - CodeGen::BarrierKind barrierKind = - treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL; - - instGen_MemoryBarrier(barrierKind); - break; - } - - case GT_CMPXCHG: - genCodeForCmpXchg(treeNode->AsCmpXchg()); - break; - - case GT_RELOAD: - // do nothing - reload is just a marker. - // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child - // into the register specified in this node. - break; - - case GT_NOP: - break; - - case GT_KEEPALIVE: - genConsumeRegs(treeNode->AsOp()->gtOp1); - break; - - case GT_NO_OP: - GetEmitter()->emitIns_Nop(1); - break; - - case GT_ARR_BOUNDS_CHECK: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif // FEATURE_SIMD -#ifdef FEATURE_HW_INTRINSICS - case GT_HW_INTRINSIC_CHK: -#endif // FEATURE_HW_INTRINSICS - genRangeCheck(treeNode); - break; - - case GT_PHYSREG: - genCodeForPhysReg(treeNode->AsPhysReg()); - break; - - case GT_NULLCHECK: - genCodeForNullCheck(treeNode->AsIndir()); - break; - - case GT_CATCH_ARG: - - noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); - - /* Catch arguments get passed in a register. genCodeForBBlist() - would have marked it as holding a GC object, but not used. */ - - noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); - genConsumeReg(treeNode); - break; - -#if !defined(FEATURE_EH_FUNCLETS) - case GT_END_LFIN: - - // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates: - // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var - - size_t finallyNesting; - finallyNesting = treeNode->AsVal()->gtVal1; - noway_assert(treeNode->AsVal()->gtVal1 < compiler->compHndBBtabCount); - noway_assert(finallyNesting < compiler->compHndBBtabCount); - - // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) - unsigned filterEndOffsetSlotOffs; - PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > - TARGET_POINTER_SIZE); // below doesn't underflow. - filterEndOffsetSlotOffs = - (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); - - size_t curNestingSlotOffs; - curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE); - GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, (unsigned)curNestingSlotOffs, - 0); - break; -#endif // !FEATURE_EH_FUNCLETS - - case GT_PINVOKE_PROLOG: - noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); - -#ifdef PSEUDORANDOM_NOP_INSERTION - // the runtime side requires the codegen here to be consistent - emit->emitDisableRandomNops(); -#endif // PSEUDORANDOM_NOP_INSERTION - break; - - case GT_LABEL: - genPendingCallLabel = genCreateTempLabel(); - emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->GetRegNum()); - break; - - case GT_STORE_OBJ: - case GT_STORE_DYN_BLK: - case GT_STORE_BLK: - genCodeForStoreBlk(treeNode->AsBlk()); - break; - - case GT_JMPTABLE: - genJumpTable(treeNode); - break; - - case GT_SWITCH_TABLE: - genTableBasedSwitch(treeNode); - break; - - case GT_ARR_INDEX: - genCodeForArrIndex(treeNode->AsArrIndex()); - break; - - case GT_ARR_OFFSET: - genCodeForArrOffset(treeNode->AsArrOffs()); - break; - - case GT_CLS_VAR_ADDR: - emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->AsClsVar()->gtClsVarHnd, 0); - genProduceReg(treeNode); - break; - -#if !defined(TARGET_64BIT) - case GT_LONG: - assert(treeNode->isUsedFromReg()); - genConsumeRegs(treeNode); - break; -#endif - - case GT_IL_OFFSET: - // Do nothing; these nodes are simply markers for debug info. - break; - - default: - { -#ifdef DEBUG - char message[256]; - _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n", - GenTree::OpName(treeNode->OperGet())); - NYIRAW(message); -#endif - assert(!"Unknown node in codegen"); - } - break; - } -} - -#ifdef FEATURE_SIMD -//---------------------------------------------------------------------------------- -// genMultiRegStoreToSIMDLocal: store multi-reg value to a single-reg SIMD local -// -// Arguments: -// lclNode - GentreeLclVar of GT_STORE_LCL_VAR -// -// Return Value: -// None -// -void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) -{ -#ifdef UNIX_AMD64_ABI - regNumber dst = lclNode->GetRegNum(); - GenTree* op1 = lclNode->gtGetOp1(); - GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); - unsigned regCount = - actualOp1->IsMultiRegLclVar() ? actualOp1->AsLclVar()->GetFieldCount(compiler) : actualOp1->GetMultiRegCount(); - assert(op1->IsMultiRegNode()); - genConsumeRegs(op1); - - // Right now the only enregistrable structs supported are SIMD types. - // They are only returned in 1 or 2 registers - the 1 register case is - // handled as a regular STORE_LCL_VAR. - // This case is always a call (AsCall() will assert if it is not). - GenTreeCall* call = actualOp1->AsCall(); - const ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT); - - assert(regCount == 2); - assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0))); - assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1))); - - // This is a case where the two 8-bytes that comprise the operand are in - // two different xmm registers and need to be assembled into a single - // xmm register. - regNumber targetReg = lclNode->GetRegNum(); - regNumber reg0 = call->GetRegNumByIdx(0); - regNumber reg1 = call->GetRegNumByIdx(1); - - if (op1->IsCopyOrReload()) - { - // GT_COPY/GT_RELOAD will have valid reg for those positions - // that need to be copied or reloaded. - regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0); - if (reloadReg != REG_NA) - { - reg0 = reloadReg; - } - - reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1); - if (reloadReg != REG_NA) - { - reg1 = reloadReg; - } - } - - if (targetReg != reg0 && targetReg != reg1) - { - // targetReg = reg0; - // targetReg[127:64] = reg1[127:64] - inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE); - inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00); - } - else if (targetReg == reg0) - { - // (elided) targetReg = reg0 - // targetReg[127:64] = reg1[127:64] - inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00); - } - else - { - assert(targetReg == reg1); - // We need two shuffles to achieve this - // First: - // targetReg[63:0] = targetReg[63:0] - // targetReg[127:64] = reg0[63:0] - // - // Second: - // targetReg[63:0] = targetReg[127:64] - // targetReg[127:64] = targetReg[63:0] - // - // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg - // and next swap low and high 8-bytes of targetReg to have them - // rearranged in the right order. - inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00); - inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01); - } - genProduceReg(lclNode); -#else // !UNIX_AMD64_ABI - assert(!"Multireg store to SIMD reg not supported on X64 Windows"); -#endif // !UNIX_AMD64_ABI -} -#endif // FEATURE_SIMD - -//------------------------------------------------------------------------ -// genAllocLclFrame: Probe the stack and allocate the local stack frame - subtract from SP. -// -// Arguments: -// frameSize - the size of the stack frame being allocated. -// initReg - register to use as a scratch register. -// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if -// this call sets 'initReg' to a non-zero value. -// maskArgRegsLiveIn - incoming argument registers that are currently live. -// -// Return value: -// None -// -void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) -{ - assert(compiler->compGeneratingProlog); - - if (frameSize == 0) - { - return; - } - - const target_size_t pageSize = compiler->eeGetPageSize(); - - if (frameSize == REGSIZE_BYTES) - { - // Frame size is the same as register size. - GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_EAX); - compiler->unwindAllocStack(frameSize); - } - else if (frameSize < pageSize) - { - GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize); - compiler->unwindAllocStack(frameSize); - - const unsigned lastProbedLocToFinalSp = frameSize; - - if (lastProbedLocToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) - { - // We haven't probed almost a complete page. If the next action on the stack might subtract from SP - // first, before touching the current SP, then we need to probe at the very bottom. This can - // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" - // strategy. - GetEmitter()->emitIns_R_AR(INS_test, EA_4BYTE, REG_EAX, REG_SPBASE, 0); - } - } - else - { -#ifdef TARGET_X86 - int spOffset = -(int)frameSize; - - if (compiler->info.compPublishStubParam) - { - GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_SECRET_STUB_PARAM); - spOffset += REGSIZE_BYTES; - } - - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, spOffset); - regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); - - genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN); - - if (compiler->info.compPublishStubParam) - { - GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, REG_SECRET_STUB_PARAM); - GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize); - } - else - { - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG); - } -#else // !TARGET_X86 - static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == - RBM_NONE); - - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize); - regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); - - genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN); - - if (initReg == REG_DEFAULT_HELPER_CALL_TARGET) - { - *pInitRegZeroed = false; - } - - static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RBM_NONE); - - GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG); -#endif // !TARGET_X86 - - compiler->unwindAllocStack(frameSize); - - if (initReg == REG_STACK_PROBE_HELPER_ARG) - { - *pInitRegZeroed = false; - } - } - -#ifdef USING_SCOPE_INFO - if (!doubleAlignOrFramePointerUsed()) - { - psiAdjustStackLevel(frameSize); - } -#endif // USING_SCOPE_INFO -} - -//------------------------------------------------------------------------ -// genStackPointerConstantAdjustment: add a specified constant value to the stack pointer. -// No probe is done. -// -// Arguments: -// spDelta - the value to add to SP. Must be negative or zero. -// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP -// adjustment from the emitter, using this register. -// -// Return Value: -// None. -// -void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) -{ - assert(spDelta < 0); - - // We assert that the SP change is less than one page. If it's greater, you should have called a - // function that does a probe, which will in turn call this function. - assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); - -#ifdef TARGET_X86 - if (regTmp != REG_NA) - { - // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment - // to ESP. So do the work in the count register. - // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require - // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't - // track". - inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); - inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE); - inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); - } - else -#endif // TARGET_X86 - { - inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); - } -} - -//------------------------------------------------------------------------ -// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer, -// and probe the stack as appropriate. Should only be called as a helper for -// genStackPointerConstantAdjustmentLoopWithProbe. -// -// Arguments: -// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, -// but the stack pointer doesn't move. -// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP -// adjustment from the emitter, using this register. -// -// Return Value: -// None. -// -void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) -{ - GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); - genStackPointerConstantAdjustment(spDelta, regTmp); -} - -//------------------------------------------------------------------------ -// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer, -// and probe the stack as appropriate. Generates one probe per page, up to the total amount required. -// This will generate a sequence of probes in-line. It is required for the case where we need to expose -// (not hide) the stack level adjustment. We can't use the dynamic loop in that case, because the total -// stack adjustment would not be visible to the emitter. It would be possible to use this version for -// multiple hidden constant stack level adjustments but we don't do that currently (we use the loop -// version in genStackPointerDynamicAdjustmentWithProbe instead). -// -// Arguments: -// spDelta - the value to add to SP. Must be negative. -// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP -// adjustment from the emitter, using this register. -// -// Return Value: -// Offset in bytes from SP to last probed address. -// -target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) -{ - assert(spDelta < 0); - - const target_size_t pageSize = compiler->eeGetPageSize(); - - ssize_t spRemainingDelta = spDelta; - do - { - ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); - genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp); - spRemainingDelta -= spOneDelta; - } while (spRemainingDelta < 0); - - // What offset from the final SP was the last probe? This depends on the fact that - // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP". - target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize; - if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)) - { - // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact - // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed - // the page, but very far from the end. If the next action on the stack might subtract from SP - // first, before touching the current SP, then we do one more probe at the very bottom. This can - // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" - // strategy. - - GetEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, 0); - lastTouchDelta = 0; - } - - return lastTouchDelta; -} - -//------------------------------------------------------------------------ -// genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer, -// and probe the stack as appropriate. -// -// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently, -// requires a temporary register and extra code. -// -// Arguments: -// regSpDelta - the register value to add to SP. The value in this register must be negative. -// This register might be trashed. -// regTmp - an available temporary register. Will be trashed. -// -// Return Value: -// None. -// -void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp) -{ - assert(regSpDelta != REG_NA); - assert(regTmp != REG_NA); - - // Tickle the pages to ensure that ESP is always valid and is - // in sync with the "stack guard page". Note that in the worst - // case ESP is on the last byte of the guard page. Thus you must - // touch ESP-0 first not ESP-0x1000. - // - // Another subtlety is that you don't want ESP to be exactly on the - // boundary of the guard page because PUSH is predecrement, thus - // call setup would not touch the guard page but just beyond it. - // - // Note that we go through a few hoops so that ESP never points to - // illegal pages at any time during the tickling process - // - // add regSpDelta, ESP // reg now holds ultimate ESP - // jb loop // result is smaller than original ESP (no wrap around) - // xor regSpDelta, regSpDelta // Overflow, pick lowest possible number - // loop: - // test ESP, [ESP+0] // tickle the page - // mov regTmp, ESP - // sub regTmp, eeGetPageSize() - // mov ESP, regTmp - // cmp ESP, regSpDelta - // jae loop - // mov ESP, regSpDelta - - BasicBlock* loop = genCreateTempLabel(); - - inst_RV_RV(INS_add, regSpDelta, REG_SPBASE, TYP_I_IMPL); - inst_JMP(EJ_jb, loop); - - instGen_Set_Reg_To_Zero(EA_PTRSIZE, regSpDelta); - - genDefineTempLabel(loop); - - // Tickle the decremented value. Note that it must be done BEFORE the update of ESP since ESP might already - // be on the guard page. It is OK to leave the final value of ESP on the guard page. - GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); - - // Subtract a page from ESP. This is a trick to avoid the emitter trying to track the - // decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly. - inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); - inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); - inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); - - inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL); - inst_JMP(EJ_jae, loop); - - // Move the final value to ESP - inst_RV_RV(INS_mov, REG_SPBASE, regSpDelta); -} - -//------------------------------------------------------------------------ -// genLclHeap: Generate code for localloc. -// -// Arguments: -// tree - the localloc tree to generate. -// -// Notes: -// Note that for x86, we don't track ESP movements while generating the localloc code. -// The ESP tracking is used to report stack pointer-relative GC info, which is not -// interesting while doing the localloc construction. Also, for functions with localloc, -// we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function -// call arguments. -// -// For x86, we store the ESP after the localloc is complete in the LocAllocSP -// variable. This variable is implicitly reported to the VM in the GC info (its position -// is defined by convention relative to other items), and is used by the GC to find the -// "base" stack pointer in functions with localloc. -// -void CodeGen::genLclHeap(GenTree* tree) -{ - assert(tree->OperGet() == GT_LCLHEAP); - assert(compiler->compLocallocUsed); - - GenTree* size = tree->AsOp()->gtOp1; - noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); - - regNumber targetReg = tree->GetRegNum(); - regNumber regCnt = REG_NA; - var_types type = genActualType(size->gtType); - emitAttr easz = emitTypeSize(type); - BasicBlock* endLabel = nullptr; - target_ssize_t lastTouchDelta = (target_ssize_t)-1; - -#ifdef DEBUG - genStackPointerCheck(compiler->opts.compStackCheckOnRet, compiler->lvaReturnSpCheck); -#endif - - noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes - noway_assert(genStackLevel == 0); // Can't have anything on the stack - - unsigned stackAdjustment = 0; - - // compute the amount of memory to allocate to properly STACK_ALIGN. - size_t amount = 0; - if (size->IsCnsIntOrI()) - { - // If size is a constant, then it must be contained. - assert(size->isContained()); - - // If amount is zero then return null in targetReg - amount = size->AsIntCon()->gtIconVal; - if (amount == 0) - { - instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); - goto BAILOUT; - } - - // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN - amount = AlignUp(amount, STACK_ALIGN); - } - else - { - // The localloc requested memory size is non-constant. - - // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg. - genConsumeRegAndCopy(size, targetReg); - endLabel = genCreateTempLabel(); - GetEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg); - inst_JMP(EJ_je, endLabel); - - // Compute the size of the block to allocate and perform alignment. - // If compInitMem=true, we can reuse targetReg as regcnt, - // since we don't need any internal registers. - if (compiler->info.compInitMem) - { - assert(tree->AvailableTempRegCount() == 0); - regCnt = targetReg; - } - else - { - regCnt = tree->ExtractTempReg(); - if (regCnt != targetReg) - { - // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. - inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); - } - } - - // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done - // by code like: - // add reg, 15 - // and reg, -16 - // However, in the initialized memory case, we need the count of STACK_ALIGN-sized - // elements, not a byte count, after the alignment. So instead of the "and", which - // becomes unnecessary, generate a shift, e.g.: - // add reg, 15 - // shr reg, 4 - - inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type)); - - if (compiler->info.compInitMem) - { - // Convert the count from a count of bytes to a loop count. We will loop once per - // stack alignment size, so each loop will zero 4 bytes on Windows/x86, and 16 bytes - // on x64 and Linux/x86. - // - // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size - // words per iteration on x64. We will shift off all the stack alignment bits - // added above, so there is no need for an 'and' instruction. - - // --- shr regCnt, 2 (or 4) --- - inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT); - } - else - { - // Otherwise, mask off the low bits to align the byte count. - inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); - } - } - -#if FEATURE_FIXED_OUT_ARGS - // If we have an outgoing arg area then we must adjust the SP by popping off the - // outgoing arg area. We will restore it right before we return from this method. - // - // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following - // are the cases that need to be handled: - // i) Method has out-going arg area. - // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs). - // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space. - // ii) Method has no out-going arg area. - // Nothing to pop off from the stack. - if (compiler->lvaOutgoingArgSpaceSize > 0) - { - assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain - // aligned - inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); - stackAdjustment += compiler->lvaOutgoingArgSpaceSize; - } -#endif - - if (size->IsCnsIntOrI()) - { - // We should reach here only for non-zero, constant size allocations. - assert(amount > 0); - assert((amount % STACK_ALIGN) == 0); - assert((amount % REGSIZE_BYTES) == 0); - - // For small allocations we will generate up to six push 0 inline - size_t cntRegSizedWords = amount / REGSIZE_BYTES; - if (cntRegSizedWords <= 6) - { - for (; cntRegSizedWords != 0; cntRegSizedWords--) - { - inst_IV(INS_push_hide, 0); // push_hide means don't track the stack - } - - lastTouchDelta = 0; - - goto ALLOC_DONE; - } - - bool initMemOrLargeAlloc = - compiler->info.compInitMem || (amount >= compiler->eeGetPageSize()); // must be >= not > - -#ifdef TARGET_X86 - bool needRegCntRegister = true; -#else // !TARGET_X86 - bool needRegCntRegister = initMemOrLargeAlloc; -#endif // !TARGET_X86 - - if (needRegCntRegister) - { - // If compInitMem=true, we can reuse targetReg as regcnt. - // Since size is a constant, regCnt is not yet initialized. - assert(regCnt == REG_NA); - if (compiler->info.compInitMem) - { - assert(tree->AvailableTempRegCount() == 0); - regCnt = targetReg; - } - else - { - regCnt = tree->ExtractTempReg(); - } - } - - if (!initMemOrLargeAlloc) - { - // Since the size is less than a page, and we don't need to zero init memory, simply adjust ESP. - // ESP might already be in the guard page, so we must touch it BEFORE - // the alloc, not after. - - assert(amount < compiler->eeGetPageSize()); // must be < not <= - lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, regCnt); - goto ALLOC_DONE; - } - - // else, "mov regCnt, amount" - - if (compiler->info.compInitMem) - { - // When initializing memory, we want 'amount' to be the loop count. - assert((amount % STACK_ALIGN) == 0); - amount /= STACK_ALIGN; - } - - genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); - } - - if (compiler->info.compInitMem) - { - // At this point 'regCnt' is set to the number of loop iterations for this loop, if each - // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes. - // Since we have to zero out the allocated memory AND ensure that RSP is always valid - // by tickling the pages, we will just push 0's on the stack. - - assert(genIsValidIntReg(regCnt)); - - // Loop: - BasicBlock* loop = genCreateTempLabel(); - genDefineTempLabel(loop); - - static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0); - unsigned const count = (STACK_ALIGN / REGSIZE_BYTES); - - for (unsigned i = 0; i < count; i++) - { - inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0 - } - // Note that the stack must always be aligned to STACK_ALIGN bytes - - // Decrement the loop counter and loop if not done. - inst_RV(INS_dec, regCnt, TYP_I_IMPL); - inst_JMP(EJ_jne, loop); - - lastTouchDelta = 0; - } - else - { - // At this point 'regCnt' is set to the total number of bytes to localloc. - // Negate this value before calling the function to adjust the stack (which - // adds to ESP). - - inst_RV(INS_NEG, regCnt, TYP_I_IMPL); - regNumber regTmp = tree->GetSingleTempReg(); - genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp); - - // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, - // we're going to assume the worst and probe. - } - -ALLOC_DONE: - // Re-adjust SP to allocate out-going arg area. Note: this also requires probes, if we have - // a very large stack adjustment! For simplicity, we use the same function used elsewhere, - // which probes the current address before subtracting. We may end up probing multiple - // times relatively "nearby". - if (stackAdjustment > 0) - { - assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned - assert(lastTouchDelta >= -1); - - if ((lastTouchDelta == (target_ssize_t)-1) || - (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > - compiler->eeGetPageSize())) - { - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, REG_NA); - } - else - { - genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, REG_NA); - } - } - - // Return the stackalloc'ed address in result register. - // TargetReg = RSP + stackAdjustment. - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment); - - if (endLabel != nullptr) - { - genDefineTempLabel(endLabel); - } - -BAILOUT: - -#ifdef JIT32_GCENCODER - if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) - { - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); - } -#endif // JIT32_GCENCODER - -#ifdef DEBUG - // Update local variable to reflect the new stack pointer. - if (compiler->opts.compStackCheckOnRet) - { - noway_assert(compiler->lvaReturnSpCheck != 0xCCCCCCCC && - compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister && - compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame); - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0); - } -#endif - - genProduceReg(tree); -} - -void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) -{ - assert(storeBlkNode->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK)); - - if (storeBlkNode->OperIs(GT_STORE_OBJ)) - { -#ifndef JIT32_GCENCODER - assert(!storeBlkNode->gtBlkOpGcUnsafe); -#endif - assert(storeBlkNode->OperIsCopyBlkOp()); - assert(storeBlkNode->AsObj()->GetLayout()->HasGCPtr()); - genCodeForCpObj(storeBlkNode->AsObj()); - return; - } - - bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp(); - - switch (storeBlkNode->gtBlkOpKind) - { -#ifdef TARGET_AMD64 - case GenTreeBlk::BlkOpKindHelper: - assert(!storeBlkNode->gtBlkOpGcUnsafe); - if (isCopyBlk) - { - genCodeForCpBlkHelper(storeBlkNode); - } - else - { - genCodeForInitBlkHelper(storeBlkNode); - } - break; -#endif // TARGET_AMD64 - case GenTreeBlk::BlkOpKindRepInstr: -#ifndef JIT32_GCENCODER - assert(!storeBlkNode->gtBlkOpGcUnsafe); -#endif - if (isCopyBlk) - { - genCodeForCpBlkRepMovs(storeBlkNode); - } - else - { - genCodeForInitBlkRepStos(storeBlkNode); - } - break; - case GenTreeBlk::BlkOpKindUnroll: - if (isCopyBlk) - { -#ifndef JIT32_GCENCODER - if (storeBlkNode->gtBlkOpGcUnsafe) - { - GetEmitter()->emitDisableGC(); - } -#endif - genCodeForCpBlkUnroll(storeBlkNode); -#ifndef JIT32_GCENCODER - if (storeBlkNode->gtBlkOpGcUnsafe) - { - GetEmitter()->emitEnableGC(); - } -#endif - } - else - { -#ifndef JIT32_GCENCODER - assert(!storeBlkNode->gtBlkOpGcUnsafe); -#endif - genCodeForInitBlkUnroll(storeBlkNode); - } - break; - default: - unreached(); - } -} - -// -//------------------------------------------------------------------------ -// genCodeForInitBlkRepStos: Generate code for InitBlk using rep stos. -// -// Arguments: -// initBlkNode - The Block store for which we are generating code. -// -void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode) -{ - genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX); - instGen(INS_r_stosb); -} - -//---------------------------------------------------------------------------------- -// genCodeForInitBlkUnroll: Generate unrolled block initialization code. -// -// Arguments: -// node - the GT_STORE_BLK node to generate code for -// -void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) -{ - assert(node->OperIs(GT_STORE_BLK)); - - unsigned dstLclNum = BAD_VAR_NUM; - regNumber dstAddrBaseReg = REG_NA; - regNumber dstAddrIndexReg = REG_NA; - unsigned dstAddrIndexScale = 1; - int dstOffset = 0; - GenTree* dstAddr = node->Addr(); - - if (!dstAddr->isContained()) - { - dstAddrBaseReg = genConsumeReg(dstAddr); - } - else if (dstAddr->OperIsAddrMode()) - { - GenTreeAddrMode* addrMode = dstAddr->AsAddrMode(); - - if (addrMode->HasBase()) - { - dstAddrBaseReg = genConsumeReg(addrMode->Base()); - } - - if (addrMode->HasIndex()) - { - dstAddrIndexReg = genConsumeReg(addrMode->Index()); - dstAddrIndexScale = addrMode->GetScale(); - } - - dstOffset = addrMode->Offset(); - } - else - { - assert(dstAddr->OperIsLocalAddr()); - dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); - dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); - } - - regNumber srcIntReg = REG_NA; - GenTree* src = node->Data(); - - if (src->OperIs(GT_INIT_VAL)) - { - assert(src->isContained()); - src = src->AsUnOp()->gtGetOp1(); - } - - if (!src->isContained()) - { - srcIntReg = genConsumeReg(src); - } - else - { - // If src is contained then it must be 0 and the size must be a multiple - // of XMM_REGSIZE_BYTES so initialization can use only SSE2 instructions. - assert(src->IsIntegralConst(0)); - assert((node->GetLayout()->GetSize() % XMM_REGSIZE_BYTES) == 0); - } - - emitter* emit = GetEmitter(); - unsigned size = node->GetLayout()->GetSize(); - - assert(size <= INT32_MAX); - assert(dstOffset < (INT32_MAX - static_cast(size))); - - // Fill as much as possible using SSE2 stores. - if (size >= XMM_REGSIZE_BYTES) - { - regNumber srcXmmReg = node->GetSingleTempReg(RBM_ALLFLOAT); - - if (src->gtSkipReloadOrCopy()->IsIntegralConst(0)) - { - // If the source is constant 0 then always use xorps, it's faster - // than copying the constant from a GPR to a XMM register. - emit->emitIns_R_R(INS_xorps, EA_16BYTE, srcXmmReg, srcXmmReg); - } - else - { - emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg); - emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); -#ifdef TARGET_X86 - // For x86, we need one more to convert it from 8 bytes to 16 bytes. - emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); -#endif - } - - instruction simdMov = simdUnalignedMovIns(); - for (unsigned regSize = XMM_REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize) - { - if (dstLclNum != BAD_VAR_NUM) - { - emit->emitIns_S_R(simdMov, EA_ATTR(regSize), srcXmmReg, dstLclNum, dstOffset); - } - else - { - emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), srcXmmReg, dstAddrBaseReg, dstAddrIndexReg, - dstAddrIndexScale, dstOffset); - } - } - - // TODO-CQ-XArch: On x86 we could initialize 8 byte at once by using MOVQ instead of two 4 byte MOV stores. - // On x64 it may also be worth zero initializing a 4/8 byte remainder using MOVD/MOVQ, that avoids the need - // to allocate a GPR just for the remainder. - } - - // Fill the remainder using normal stores. - for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize) - { - while (regSize > size) - { - regSize /= 2; - } - - if (dstLclNum != BAD_VAR_NUM) - { - emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset); - } - else - { - emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg, - dstAddrIndexScale, dstOffset); - } - } -} - -#ifdef TARGET_AMD64 -//------------------------------------------------------------------------ -// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call -// -// Arguments: -// initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] -// -// Preconditions: -// The register assignments have been set appropriately. -// This is validated by genConsumeBlockOp(). -// -void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) -{ - // Destination address goes in arg0, source address goes in arg1, and size goes in arg2. - // genConsumeBlockOp takes care of this for us. - genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); - - genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); -} -#endif // TARGET_AMD64 - -#ifdef FEATURE_PUT_STRUCT_ARG_STK -// Generate code for a load from some address + offset -// baseNode: tree node which can be either a local address or arbitrary node -// offset: distance from the baseNode from which to load -void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset) -{ - emitter* emit = GetEmitter(); - - if (baseNode->OperIsLocalAddr()) - { - const GenTreeLclVarCommon* lclVar = baseNode->AsLclVarCommon(); - offset += lclVar->GetLclOffs(); - emit->emitIns_R_S(ins, size, dst, lclVar->GetLclNum(), offset); - } - else - { - emit->emitIns_R_AR(ins, size, dst, baseNode->GetRegNum(), offset); - } -} -#endif // FEATURE_PUT_STRUCT_ARG_STK - -//---------------------------------------------------------------------------------- -// genCodeForCpBlkUnroll - Generate unrolled block copy code. -// -// Arguments: -// node - the GT_STORE_BLK node to generate code for -// -void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) -{ - assert(node->OperIs(GT_STORE_BLK)); - - unsigned dstLclNum = BAD_VAR_NUM; - regNumber dstAddrBaseReg = REG_NA; - regNumber dstAddrIndexReg = REG_NA; - unsigned dstAddrIndexScale = 1; - int dstOffset = 0; - GenTree* dstAddr = node->Addr(); - - if (!dstAddr->isContained()) - { - dstAddrBaseReg = genConsumeReg(dstAddr); - } - else if (dstAddr->OperIsAddrMode()) - { - GenTreeAddrMode* addrMode = dstAddr->AsAddrMode(); - - if (addrMode->HasBase()) - { - dstAddrBaseReg = genConsumeReg(addrMode->Base()); - } - - if (addrMode->HasIndex()) - { - dstAddrIndexReg = genConsumeReg(addrMode->Index()); - dstAddrIndexScale = addrMode->GetScale(); - } - - dstOffset = addrMode->Offset(); - } - else - { - assert(dstAddr->OperIsLocalAddr()); - const GenTreeLclVarCommon* lclVar = dstAddr->AsLclVarCommon(); - dstLclNum = lclVar->GetLclNum(); - dstOffset = lclVar->GetLclOffs(); - } - - unsigned srcLclNum = BAD_VAR_NUM; - regNumber srcAddrBaseReg = REG_NA; - regNumber srcAddrIndexReg = REG_NA; - unsigned srcAddrIndexScale = 1; - int srcOffset = 0; - GenTree* src = node->Data(); - - assert(src->isContained()); - - if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - srcLclNum = src->AsLclVarCommon()->GetLclNum(); - srcOffset = src->AsLclVarCommon()->GetLclOffs(); - } - else - { - assert(src->OperIs(GT_IND)); - GenTree* srcAddr = src->AsIndir()->Addr(); - - if (!srcAddr->isContained()) - { - srcAddrBaseReg = genConsumeReg(srcAddr); - } - else if (srcAddr->OperIsAddrMode()) - { - GenTreeAddrMode* addrMode = srcAddr->AsAddrMode(); - - if (addrMode->HasBase()) - { - srcAddrBaseReg = genConsumeReg(addrMode->Base()); - } - - if (addrMode->HasIndex()) - { - srcAddrIndexReg = genConsumeReg(addrMode->Index()); - srcAddrIndexScale = addrMode->GetScale(); - } - - srcOffset = addrMode->Offset(); - } - else - { - assert(srcAddr->OperIsLocalAddr()); - srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); - srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs(); - } - } - - emitter* emit = GetEmitter(); - unsigned size = node->GetLayout()->GetSize(); - - assert(size <= INT32_MAX); - assert(srcOffset < (INT32_MAX - static_cast(size))); - assert(dstOffset < (INT32_MAX - static_cast(size))); - - if (size >= XMM_REGSIZE_BYTES) - { - regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT); - - instruction simdMov = simdUnalignedMovIns(); - for (unsigned regSize = XMM_REGSIZE_BYTES; size >= regSize; - size -= regSize, srcOffset += regSize, dstOffset += regSize) - { - if (srcLclNum != BAD_VAR_NUM) - { - emit->emitIns_R_S(simdMov, EA_ATTR(regSize), tempReg, srcLclNum, srcOffset); - } - else - { - emit->emitIns_R_ARX(simdMov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg, - srcAddrIndexScale, srcOffset); - } - - if (dstLclNum != BAD_VAR_NUM) - { - emit->emitIns_S_R(simdMov, EA_ATTR(regSize), tempReg, dstLclNum, dstOffset); - } - else - { - emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg, - dstAddrIndexScale, dstOffset); - } - } - - // TODO-CQ-XArch: On x86 we could copy 8 byte at once by using MOVQ instead of four 4 byte MOV stores. - // On x64 it may also be worth copying a 4/8 byte remainder using MOVD/MOVQ, that avoids the need to - // allocate a GPR just for the remainder. - } - - if (size > 0) - { - regNumber tempReg = node->GetSingleTempReg(RBM_ALLINT); - - for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, srcOffset += regSize, dstOffset += regSize) - { - while (regSize > size) - { - regSize /= 2; - } - - if (srcLclNum != BAD_VAR_NUM) - { - emit->emitIns_R_S(INS_mov, EA_ATTR(regSize), tempReg, srcLclNum, srcOffset); - } - else - { - emit->emitIns_R_ARX(INS_mov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg, - srcAddrIndexScale, srcOffset); - } - - if (dstLclNum != BAD_VAR_NUM) - { - emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), tempReg, dstLclNum, dstOffset); - } - else - { - emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg, - dstAddrIndexScale, dstOffset); - } - } - } -} - -//---------------------------------------------------------------------------------- -// genCodeForCpBlkRepMovs - Generate code for CpBlk by using rep movs -// -// Arguments: -// cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] -// -// Preconditions: -// The register assignments have been set appropriately. -// This is validated by genConsumeBlockOp(). -// -void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode) -{ - // Destination address goes in RDI, source address goes in RSE, and size goes in RCX. - // genConsumeBlockOp takes care of this for us. - genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX); - instGen(INS_r_movsb); -} - -#ifdef FEATURE_PUT_STRUCT_ARG_STK -//------------------------------------------------------------------------ -// CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area -// -// Arguments: -// size - The size of bytes remaining to be moved -// longTmpReg - The tmp register to be used for the long value -// srcAddr - The address of the source struct -// offset - The current offset being copied -// -// Return Value: -// Returns the number of bytes moved (8 or 0). -// -// Notes: -// This is used in the PutArgStkKindUnroll case, to move any bytes that are -// not an even multiple of 16. -// On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register. -// This is checked by genStoreRegToStackArg. -// -unsigned CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset) -{ -#ifdef TARGET_X86 - instruction longMovIns = INS_movq; -#else // !TARGET_X86 - instruction longMovIns = INS_mov; -#endif // !TARGET_X86 - if ((size & 8) != 0) - { - genCodeForLoadOffset(longMovIns, EA_8BYTE, longTmpReg, srcAddr, offset); - genStoreRegToStackArg(TYP_LONG, longTmpReg, offset); - return 8; - } - return 0; -} - -//------------------------------------------------------------------------ -// CodeGen::genMove4IfNeeded: Conditionally move 4 bytes of a struct to the argument area -// -// Arguments: -// size - The size of bytes remaining to be moved -// intTmpReg - The tmp register to be used for the long value -// srcAddr - The address of the source struct -// offset - The current offset being copied -// -// Return Value: -// Returns the number of bytes moved (4 or 0). -// -// Notes: -// This is used in the PutArgStkKindUnroll case, to move any bytes that are -// not an even multiple of 16. -// intTmpReg must be an integer register. -// This is checked by genStoreRegToStackArg. -// -unsigned CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) -{ - if ((size & 4) != 0) - { - genCodeForLoadOffset(INS_mov, EA_4BYTE, intTmpReg, srcAddr, offset); - genStoreRegToStackArg(TYP_INT, intTmpReg, offset); - return 4; - } - return 0; -} - -//------------------------------------------------------------------------ -// CodeGen::genMove2IfNeeded: Conditionally move 2 bytes of a struct to the argument area -// -// Arguments: -// size - The size of bytes remaining to be moved -// intTmpReg - The tmp register to be used for the long value -// srcAddr - The address of the source struct -// offset - The current offset being copied -// -// Return Value: -// Returns the number of bytes moved (2 or 0). -// -// Notes: -// This is used in the PutArgStkKindUnroll case, to move any bytes that are -// not an even multiple of 16. -// intTmpReg must be an integer register. -// This is checked by genStoreRegToStackArg. -// -unsigned CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) -{ - if ((size & 2) != 0) - { - genCodeForLoadOffset(INS_mov, EA_2BYTE, intTmpReg, srcAddr, offset); - genStoreRegToStackArg(TYP_SHORT, intTmpReg, offset); - return 2; - } - return 0; -} - -//------------------------------------------------------------------------ -// CodeGen::genMove1IfNeeded: Conditionally move 1 byte of a struct to the argument area -// -// Arguments: -// size - The size of bytes remaining to be moved -// intTmpReg - The tmp register to be used for the long value -// srcAddr - The address of the source struct -// offset - The current offset being copied -// -// Return Value: -// Returns the number of bytes moved (1 or 0). -// -// Notes: -// This is used in the PutArgStkKindUnroll case, to move any bytes that are -// not an even multiple of 16. -// intTmpReg must be an integer register. -// This is checked by genStoreRegToStackArg. -// -unsigned CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) -{ - if ((size & 1) != 0) - { - genCodeForLoadOffset(INS_mov, EA_1BYTE, intTmpReg, srcAddr, offset); - genStoreRegToStackArg(TYP_BYTE, intTmpReg, offset); - return 1; - } - return 0; -} - -//---------------------------------------------------------------------------------------------------------------// -// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling. -// -// Arguments: -// putArgNode - the PutArgStk tree. -// -// Notes: -// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct will be copied to the -// stack. -// -// TODO-Amd64-Unix: Try to share code with copyblk. -// Need refactoring of copyblk before it could be used for putarg_stk. -// The difference for now is that a putarg_stk contains its children, while cpyblk does not. -// This creates differences in code. After some significant refactoring it could be reused. -// -void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) -{ - GenTree* src = putArgNode->AsOp()->gtOp1; - // We will never call this method for SIMD types, which are stored directly - // in genPutStructArgStk(). - noway_assert(src->TypeGet() == TYP_STRUCT); - - unsigned size = putArgNode->GetStackByteSize(); - assert(size <= CPBLK_UNROLL_LIMIT); - - emitter* emit = GetEmitter(); - unsigned putArgOffset = putArgNode->getArgOffset(); - - assert(src->isContained()); - - assert(src->gtOper == GT_OBJ); - - if (src->AsOp()->gtOp1->isUsedFromReg()) - { - genConsumeReg(src->AsOp()->gtOp1); - } - - unsigned offset = 0; - - regNumber xmmTmpReg = REG_NA; - regNumber intTmpReg = REG_NA; - regNumber longTmpReg = REG_NA; -#ifdef TARGET_X86 - // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's - // less than 16 bytes, we will just be using pushes - if (size >= 8) - { - xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); - longTmpReg = xmmTmpReg; - } - if ((size & 0x7) != 0) - { - intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); - } -#else // !TARGET_X86 - // On x64 we use an XMM register only for 16-byte chunks. - if (size >= XMM_REGSIZE_BYTES) - { - xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); - } - if ((size & 0xf) != 0) - { - intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); - longTmpReg = intTmpReg; - } -#endif // !TARGET_X86 - - // If the size of this struct is larger than 16 bytes - // let's use SSE2 to be able to do 16 byte at a time - // loads and stores. - if (size >= XMM_REGSIZE_BYTES) - { -#ifdef TARGET_X86 - assert(!m_pushStkArg); -#endif // TARGET_X86 - size_t slots = size / XMM_REGSIZE_BYTES; - - assert(putArgNode->gtGetOp1()->isContained()); - assert(putArgNode->gtGetOp1()->AsOp()->gtOper == GT_OBJ); - - // TODO: In the below code the load and store instructions are for 16 bytes, but the - // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but - // this probably needs to be changed. - while (slots-- > 0) - { - // Load - genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src->gtGetOp1(), offset); - - // Store - genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset); - - offset += XMM_REGSIZE_BYTES; - } - } - - // Fill the remainder (15 bytes or less) if there's one. - if ((size & 0xf) != 0) - { -#ifdef TARGET_X86 - if (m_pushStkArg) - { - // This case is currently supported only for the case where the total size is - // less than XMM_REGSIZE_BYTES. We need to push the remaining chunks in reverse - // order. However, morph has ensured that we have a struct that is an even - // multiple of TARGET_POINTER_SIZE, so we don't need to worry about alignment. - assert(((size & 0xc) == size) && (offset == 0)); - // If we have a 4 byte chunk, load it from either offset 0 or 8, depending on - // whether we've got an 8 byte chunk, and then push it on the stack. - unsigned pushedBytes = genMove4IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, size & 0x8); - // Now if we have an 8 byte chunk, load it from offset 0 (it's the first chunk) - // and push it on the stack. - pushedBytes += genMove8IfNeeded(size, longTmpReg, src->AsOp()->gtOp1, 0); - } - else -#endif // TARGET_X86 - { - offset += genMove8IfNeeded(size, longTmpReg, src->AsOp()->gtOp1, offset); - offset += genMove4IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, offset); - offset += genMove2IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, offset); - offset += genMove1IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, offset); - assert(offset == size); - } - } -} - -//------------------------------------------------------------------------ -// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs. -// -// Arguments: -// putArgNode - the PutArgStk tree. -// -// Preconditions: -// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct bits will go. -// -void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode) -{ - GenTree* srcAddr = putArgNode->gtGetOp1(); - assert(srcAddr->TypeGet() == TYP_STRUCT); - - // Make sure we got the arguments of the cpblk operation in the right registers, and that - // 'srcAddr' is contained as expected. - assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI)); - assert(srcAddr->isContained()); - - genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX); - instGen(INS_r_movsb); -} - -//------------------------------------------------------------------------ -// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits -// must be cleared to zeroes. The native compiler doesn't clear the upper bits -// and there is no way to know if the caller is native or not. So, the upper -// 32 bits of Vector argument on stack are always cleared to zero. -#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -void CodeGen::genClearStackVec3ArgUpperBits() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In genClearStackVec3ArgUpperBits()\n"); - } -#endif - - assert(compiler->compGeneratingProlog); - - unsigned varNum = 0; - - for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++) - { - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - assert(varDsc->lvIsParam); - - // Does var has simd12 type? - if (varDsc->lvType != TYP_SIMD12) - { - continue; - } - - if (!varDsc->lvIsRegArg) - { - // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0 - GetEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0); - } - else - { - // Assume that for x64 linux, an argument is fully in registers - // or fully on stack. - regNumber argReg = varDsc->GetOtherArgReg(); - - // Clear the upper 32 bits by two shift instructions. - // argReg = argReg << 96 - GetEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12); - // argReg = argReg >> 96 - GetEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12); - } - } -} -#endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -#endif // FEATURE_PUT_STRUCT_ARG_STK - -// -// genCodeForCpObj - Generate code for CpObj nodes to copy structs that have interleaved -// GC pointers. -// -// Arguments: -// cpObjNode - the GT_STORE_OBJ -// -// Notes: -// This will generate a sequence of movsp instructions for the cases of non-gc members. -// Note that movsp is an alias for movsd on x86 and movsq on x64. -// and calls to the BY_REF_ASSIGN helper otherwise. -// -// Preconditions: -// The register assignments have been set appropriately. -// This is validated by genConsumeBlockOp(). -// -void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) -{ - // Make sure we got the arguments of the cpobj operation in the right registers - GenTree* dstAddr = cpObjNode->Addr(); - GenTree* source = cpObjNode->Data(); - GenTree* srcAddr = nullptr; - var_types srcAddrType = TYP_BYREF; - bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr(); - -#ifdef DEBUG - // If the GenTree node has data about GC pointers, this means we're dealing - // with CpObj, so this requires special logic. - assert(cpObjNode->GetLayout()->HasGCPtr()); - - // MovSp (alias for movsq on x64 and movsd on x86) instruction is used for copying non-gcref fields - // and it needs src = RSI and dst = RDI. - // Either these registers must not contain lclVars, or they must be dying or marked for spill. - // This is because these registers are incremented as we go through the struct. - if (!source->IsLocal()) - { - assert(source->gtOper == GT_IND); - srcAddr = source->gtGetOp1(); - GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy(); - GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy(); - unsigned srcLclVarNum = BAD_VAR_NUM; - unsigned dstLclVarNum = BAD_VAR_NUM; - bool isSrcAddrLiveOut = false; - bool isDstAddrLiveOut = false; - if (genIsRegCandidateLocal(actualSrcAddr)) - { - srcLclVarNum = actualSrcAddr->AsLclVarCommon()->GetLclNum(); - isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); - } - if (genIsRegCandidateLocal(actualDstAddr)) - { - dstLclVarNum = actualDstAddr->AsLclVarCommon()->GetLclNum(); - isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); - } - assert((actualSrcAddr->GetRegNum() != REG_RSI) || !isSrcAddrLiveOut || - ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut)); - assert((actualDstAddr->GetRegNum() != REG_RDI) || !isDstAddrLiveOut || - ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut)); - srcAddrType = srcAddr->TypeGet(); - } -#endif // DEBUG - - // Consume the operands and get them into the right registers. - // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). - genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA); - gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType); - gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); - - unsigned slots = cpObjNode->GetLayout()->GetSlotCount(); - - // If we can prove it's on the stack we don't need to use the write barrier. - if (dstOnStack) - { - if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) - { - // If the destination of the CpObj is on the stack, make sure we allocated - // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively). - assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); - - GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); - instGen(INS_r_movsp); - } - else - { - // For small structs, it's better to emit a sequence of movsp than to - // emit a rep movsp instruction. - while (slots > 0) - { - instGen(INS_movsp); - slots--; - } - } - } - else - { - ClassLayout* layout = cpObjNode->GetLayout(); - unsigned gcPtrCount = layout->GetGCPtrCount(); - - unsigned i = 0; - while (i < slots) - { - if (!layout->IsGCPtr(i)) - { - // Let's see if we can use rep movsp instead of a sequence of movsp instructions - // to save cycles and code size. - unsigned nonGcSlotCount = 0; - - do - { - nonGcSlotCount++; - i++; - } while ((i < slots) && !layout->IsGCPtr(i)); - - // If we have a very small contiguous non-gc region, it's better just to - // emit a sequence of movsp instructions - if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) - { - while (nonGcSlotCount > 0) - { - instGen(INS_movsp); - nonGcSlotCount--; - } - } - else - { - // Otherwise, we can save code-size and improve CQ by emitting - // rep movsp (alias for movsd/movsq for x86/x64) - assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); - - GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); - instGen(INS_r_movsp); - } - } - else - { - genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); - gcPtrCount--; - i++; - } - } - - assert(gcPtrCount == 0); - } - - // Clear the gcInfo for RSI and RDI. - // While we normally update GC info prior to the last instruction that uses them, - // these actually live into the helper call. - gcInfo.gcMarkRegSetNpt(RBM_RSI); - gcInfo.gcMarkRegSetNpt(RBM_RDI); -} - -#ifdef TARGET_AMD64 -//---------------------------------------------------------------------------------- -// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call -// -// Arguments: -// cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] -// -// Preconditions: -// The register assignments have been set appropriately. -// This is validated by genConsumeBlockOp(). -// -void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode) -{ - // Destination address goes in arg0, source address goes in arg1, and size goes in arg2. - // genConsumeBlockOp takes care of this for us. - genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); - - genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); -} -#endif // TARGET_AMD64 - -// generate code do a switch statement based on a table of ip-relative offsets -void CodeGen::genTableBasedSwitch(GenTree* treeNode) -{ - genConsumeOperands(treeNode->AsOp()); - regNumber idxReg = treeNode->AsOp()->gtOp1->GetRegNum(); - regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum(); - - regNumber tmpReg = treeNode->GetSingleTempReg(); - - // load the ip-relative offset (which is relative to start of fgFirstBB) - GetEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0); - - // add it to the absolute address of fgFirstBB - compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; - GetEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg); - GetEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg); - // jmp baseReg - GetEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg); -} - -// emits the table and an instruction to get the address of the first element -void CodeGen::genJumpTable(GenTree* treeNode) -{ - noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); - assert(treeNode->OperGet() == GT_JMPTABLE); - - unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; - BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; - unsigned jmpTabOffs; - unsigned jmpTabBase; - - jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true); - - jmpTabOffs = 0; - - JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", compiler->compMethodID, jmpTabBase); - - for (unsigned i = 0; i < jumpCount; i++) - { - BasicBlock* target = *jumpTable++; - noway_assert(target->bbFlags & BBF_JMP_TARGET); - - JITDUMP(" DD L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum); - - GetEmitter()->emitDataGenData(i, target); - }; - - GetEmitter()->emitDataGenEnd(); - - // Access to inline data is 'abstracted' by a special type of static member - // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference - // to constant data, not a real static field. - GetEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), - compiler->eeFindJitDataOffs(jmpTabBase), 0); - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genCodeForLockAdd: Generate code for a GT_LOCKADD node -// -// Arguments: -// node - the GT_LOCKADD node -// -void CodeGen::genCodeForLockAdd(GenTreeOp* node) -{ - assert(node->OperIs(GT_LOCKADD)); - - GenTree* addr = node->gtGetOp1(); - GenTree* data = node->gtGetOp2(); - emitAttr size = emitActualTypeSize(data->TypeGet()); - - assert(addr->isUsedFromReg()); - assert(data->isUsedFromReg() || data->isContainedIntOrIImmed()); - assert((size == EA_4BYTE) || (size == EA_PTRSIZE)); - - genConsumeOperands(node); - instGen(INS_lock); - - if (data->isContainedIntOrIImmed()) - { - int imm = static_cast(data->AsIntCon()->IconValue()); - assert(imm == data->AsIntCon()->IconValue()); - GetEmitter()->emitIns_I_AR(INS_add, size, imm, addr->GetRegNum(), 0); - } - else - { - GetEmitter()->emitIns_AR_R(INS_add, size, data->GetRegNum(), addr->GetRegNum(), 0); - } -} - -//------------------------------------------------------------------------ -// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. -// -// Arguments: -// node - the GT_XADD/XCHG node -// -void CodeGen::genLockedInstructions(GenTreeOp* node) -{ - assert(node->OperIs(GT_XADD, GT_XCHG)); - - GenTree* addr = node->gtGetOp1(); - GenTree* data = node->gtGetOp2(); - emitAttr size = emitTypeSize(node->TypeGet()); - - assert(addr->isUsedFromReg()); - assert(data->isUsedFromReg()); - assert((size == EA_4BYTE) || (size == EA_PTRSIZE)); - - genConsumeOperands(node); - - if (node->GetRegNum() != data->GetRegNum()) - { - // If the destination register is different from the data register then we need - // to first move the data to the target register. Make sure we don't overwrite - // the address, the register allocator should have taken care of this. - assert(node->GetRegNum() != addr->GetRegNum()); - GetEmitter()->emitIns_R_R(INS_mov, size, node->GetRegNum(), data->GetRegNum()); - } - - instruction ins = node->OperIs(GT_XADD) ? INS_xadd : INS_xchg; - - // XCHG has an implied lock prefix when the first operand is a memory operand. - if (ins != INS_xchg) - { - instGen(INS_lock); - } - - GetEmitter()->emitIns_AR_R(ins, size, node->GetRegNum(), addr->GetRegNum(), 0); - genProduceReg(node); -} - -//------------------------------------------------------------------------ -// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. -// -// Arguments: -// tree - the GT_CMPXCHG node -// -void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* tree) -{ - assert(tree->OperIs(GT_CMPXCHG)); - - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); - - GenTree* location = tree->gtOpLocation; // arg1 - GenTree* value = tree->gtOpValue; // arg2 - GenTree* comparand = tree->gtOpComparand; // arg3 - - assert(location->GetRegNum() != REG_NA && location->GetRegNum() != REG_RAX); - assert(value->GetRegNum() != REG_NA && value->GetRegNum() != REG_RAX); - - genConsumeReg(location); - genConsumeReg(value); - genConsumeReg(comparand); - - // comparand goes to RAX; - // Note that we must issue this move after the genConsumeRegs(), in case any of the above - // have a GT_COPY from RAX. - if (comparand->GetRegNum() != REG_RAX) - { - inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->GetRegNum(), comparand->TypeGet()); - } - - // location is Rm - instGen(INS_lock); - - GetEmitter()->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->GetRegNum(), location->GetRegNum(), 0); - - // Result is in RAX - if (targetReg != REG_RAX) - { - inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType); - } - - genProduceReg(tree); -} - -// generate code for BoundsCheck nodes -void CodeGen::genRangeCheck(GenTree* oper) -{ - noway_assert(oper->OperIsBoundsCheck()); - GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); - - GenTree* arrIndex = bndsChk->gtIndex; - GenTree* arrLen = bndsChk->gtArrLen; - - GenTree * src1, *src2; - emitJumpKind jmpKind; - instruction cmpKind; - - genConsumeRegs(arrIndex); - genConsumeRegs(arrLen); - - if (arrIndex->IsIntegralConst(0) && arrLen->isUsedFromReg()) - { - // arrIndex is 0 and arrLen is in a reg. In this case - // we can generate - // test reg, reg - // since arrLen is non-negative - src1 = arrLen; - src2 = arrLen; - jmpKind = EJ_je; - cmpKind = INS_test; - } - else if (arrIndex->isContainedIntOrIImmed()) - { - // arrIndex is a contained constant. In this case - // we will generate one of the following - // cmp [mem], immed (if arrLen is a memory op) - // cmp reg, immed (if arrLen is in a reg) - // - // That is arrLen cannot be a contained immed. - assert(!arrLen->isContainedIntOrIImmed()); - - src1 = arrLen; - src2 = arrIndex; - jmpKind = EJ_jbe; - cmpKind = INS_cmp; - } - else - { - // arrIndex could either be a contained memory op or a reg - // In this case we will generate one of the following - // cmp [mem], immed (if arrLen is a constant) - // cmp [mem], reg (if arrLen is in a reg) - // cmp reg, immed (if arrIndex is in a reg) - // cmp reg1, reg2 (if arrIndex is in reg1) - // cmp reg, [mem] (if arrLen is a memory op) - // - // That is only one of arrIndex or arrLen can be a memory op. - assert(!arrIndex->isUsedFromMemory() || !arrLen->isUsedFromMemory()); - - src1 = arrIndex; - src2 = arrLen; - jmpKind = EJ_jae; - cmpKind = INS_cmp; - } - - var_types bndsChkType = src2->TypeGet(); -#if DEBUG - // Bounds checks can only be 32 or 64 bit sized comparisons. - assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); - - // The type of the bounds check should always wide enough to compare against the index. - assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet())); -#endif // DEBUG - - GetEmitter()->emitInsBinary(cmpKind, emitTypeSize(bndsChkType), src1, src2); - genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB); -} - -//--------------------------------------------------------------------- -// genCodeForPhysReg - generate code for a GT_PHYSREG node -// -// Arguments -// tree - the GT_PHYSREG node -// -// Return value: -// None -// -void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) -{ - assert(tree->OperIs(GT_PHYSREG)); - - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); - - if (targetReg != tree->gtSrcReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); - genTransferRegGCState(targetReg, tree->gtSrcReg); - } - - genProduceReg(tree); -} - -//--------------------------------------------------------------------- -// genCodeForNullCheck - generate code for a GT_NULLCHECK node -// -// Arguments -// tree - the GT_NULLCHECK node -// -// Return value: -// None -// -void CodeGen::genCodeForNullCheck(GenTreeIndir* tree) -{ - assert(tree->OperIs(GT_NULLCHECK)); - - assert(tree->gtOp1->isUsedFromReg()); - regNumber reg = genConsumeReg(tree->gtOp1); - GetEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0); -} - -//------------------------------------------------------------------------ -// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the -// lower bound for the given dimension. -// -// Arguments: -// elemType - the element type of the array -// rank - the rank of the array -// dimension - the dimension for which the lower bound offset will be returned. -// -// Return Value: -// The offset. - -unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) -{ - // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. - return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); -} - -//------------------------------------------------------------------------ -// genOffsetOfMDArrayLength: Returns the offset from the Array object to the -// size for the given dimension. -// -// Arguments: -// elemType - the element type of the array -// rank - the rank of the array -// dimension - the dimension for which the lower bound offset will be returned. -// -// Return Value: -// The offset. - -unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) -{ - // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. - return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; -} - -//------------------------------------------------------------------------ -// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, -// producing the effective index by subtracting the lower bound. -// -// Arguments: -// arrIndex - the node for which we're generating code -// -// Return Value: -// None. -// - -void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) -{ - GenTree* arrObj = arrIndex->ArrObj(); - GenTree* indexNode = arrIndex->IndexExpr(); - - regNumber arrReg = genConsumeReg(arrObj); - regNumber indexReg = genConsumeReg(indexNode); - regNumber tgtReg = arrIndex->GetRegNum(); - - unsigned dim = arrIndex->gtCurrDim; - unsigned rank = arrIndex->gtArrRank; - var_types elemType = arrIndex->gtArrElemType; - - noway_assert(tgtReg != REG_NA); - - // Subtract the lower bound for this dimension. - // TODO-XArch-CQ: make this contained if it's an immediate that fits. - if (tgtReg != indexReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet()); - } - GetEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg, - genOffsetOfMDArrayLowerBound(elemType, rank, dim)); - GetEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg, - genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); - genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL); - - genProduceReg(arrIndex); -} - -//------------------------------------------------------------------------ -// genCodeForArrOffset: Generates code to compute the flattened array offset for -// one dimension of an array reference: -// result = (prevDimOffset * dimSize) + effectiveIndex -// where dimSize is obtained from the arrObj operand -// -// Arguments: -// arrOffset - the node for which we're generating code -// -// Return Value: -// None. -// -// Notes: -// dimSize and effectiveIndex are always non-negative, the former by design, -// and the latter because it has been normalized to be zero-based. - -void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) -{ - GenTree* offsetNode = arrOffset->gtOffset; - GenTree* indexNode = arrOffset->gtIndex; - GenTree* arrObj = arrOffset->gtArrObj; - - regNumber tgtReg = arrOffset->GetRegNum(); - assert(tgtReg != REG_NA); - - unsigned dim = arrOffset->gtCurrDim; - unsigned rank = arrOffset->gtArrRank; - var_types elemType = arrOffset->gtArrElemType; - - // First, consume the operands in the correct order. - regNumber offsetReg = REG_NA; - regNumber tmpReg = REG_NA; - if (!offsetNode->IsIntegralConst(0)) - { - offsetReg = genConsumeReg(offsetNode); - - // We will use a temp register for the offset*scale+effectiveIndex computation. - tmpReg = arrOffset->GetSingleTempReg(); - } - else - { - assert(offsetNode->isContained()); - } - regNumber indexReg = genConsumeReg(indexNode); - // Although arrReg may not be used in the constant-index case, if we have generated - // the value into a register, we must consume it, otherwise we will fail to end the - // live range of the gc ptr. - // TODO-CQ: Currently arrObj will always have a register allocated to it. - // We could avoid allocating a register for it, which would be of value if the arrObj - // is an on-stack lclVar. - regNumber arrReg = REG_NA; - if (arrObj->gtHasReg()) - { - arrReg = genConsumeReg(arrObj); - } - - if (!offsetNode->IsIntegralConst(0)) - { - assert(tmpReg != REG_NA); - assert(arrReg != REG_NA); - - // Evaluate tgtReg = offsetReg*dim_size + indexReg. - // tmpReg is used to load dim_size and the result of the multiplication. - // Note that dim_size will never be negative. - - GetEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg, - genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); - inst_RV_RV(INS_imul, tmpReg, offsetReg); - - if (tmpReg == tgtReg) - { - inst_RV_RV(INS_add, tmpReg, indexReg); - } - else - { - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL); - } - inst_RV_RV(INS_add, tgtReg, tmpReg); - } - } - else - { - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); - } - } - genProduceReg(arrOffset); -} - -instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) -{ - instruction ins; - - // Operations on SIMD vectors shouldn't come this path - assert(!varTypeIsSIMD(type)); - if (varTypeIsFloating(type)) - { - return ins_MathOp(oper, type); - } - - switch (oper) - { - case GT_ADD: - ins = INS_add; - break; - case GT_AND: - ins = INS_and; - break; - case GT_LSH: - ins = INS_shl; - break; - case GT_MUL: - ins = INS_imul; - break; - case GT_NEG: - ins = INS_neg; - break; - case GT_NOT: - ins = INS_not; - break; - case GT_OR: - ins = INS_or; - break; - case GT_ROL: - ins = INS_rol; - break; - case GT_ROR: - ins = INS_ror; - break; - case GT_RSH: - ins = INS_sar; - break; - case GT_RSZ: - ins = INS_shr; - break; - case GT_SUB: - ins = INS_sub; - break; - case GT_XOR: - ins = INS_xor; - break; -#if !defined(TARGET_64BIT) - case GT_ADD_LO: - ins = INS_add; - break; - case GT_ADD_HI: - ins = INS_adc; - break; - case GT_SUB_LO: - ins = INS_sub; - break; - case GT_SUB_HI: - ins = INS_sbb; - break; - case GT_LSH_HI: - ins = INS_shld; - break; - case GT_RSH_LO: - ins = INS_shrd; - break; -#endif // !defined(TARGET_64BIT) - default: - unreached(); - break; - } - return ins; -} - -//------------------------------------------------------------------------ -// genCodeForShift: Generates the code sequence for a GenTree node that -// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). -// -// Arguments: -// tree - the bit shift node (that specifies the type of bit shift to perform). -// -// Assumptions: -// a) All GenTrees are register allocated. -// b) The shift-by-amount in tree->AsOp()->gtOp2 is either a contained constant or -// it's a register-allocated expression. If it is in a register that is -// not RCX, it will be moved to RCX (so RCX better not be in use!). -// -void CodeGen::genCodeForShift(GenTree* tree) -{ - // Only the non-RMW case here. - assert(tree->OperIsShiftOrRotate()); - assert(tree->AsOp()->gtOp1->isUsedFromReg()); - assert(tree->GetRegNum() != REG_NA); - - genConsumeOperands(tree->AsOp()); - - var_types targetType = tree->TypeGet(); - instruction ins = genGetInsForOper(tree->OperGet(), targetType); - - GenTree* operand = tree->gtGetOp1(); - regNumber operandReg = operand->GetRegNum(); - - GenTree* shiftBy = tree->gtGetOp2(); - - if (shiftBy->isContainedIntOrIImmed()) - { - emitAttr size = emitTypeSize(tree); - - // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" - if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && shiftBy->IsIntegralConst(1)) - { - if (tree->GetRegNum() == operandReg) - { - GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg); - } - else - { - GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); - } - } - else - { - int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - -#if defined(TARGET_64BIT) - // Try to emit rorx if BMI2 is available instead of mov+rol - // it makes sense only for 64bit integers - if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) && - compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) && - (shiftByValue > 0) && (shiftByValue < 64)) - { - const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue; - GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value); - genProduceReg(tree); - return; - } -#endif - // First, move the operand to the destination register and - // later on perform the shift in-place. - // (LSRA will try to avoid this situation through preferencing.) - if (tree->GetRegNum() != operandReg) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); - } - inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); - } - } - else - { - // We must have the number of bits to shift stored in ECX, since we constrained this node to - // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single - // register destination requirement. - genCopyRegIfNeeded(shiftBy, REG_RCX); - - // The operand to be shifted must not be in ECX - noway_assert(operandReg != REG_RCX); - - if (tree->GetRegNum() != operandReg) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); - } - inst_RV_CL(ins, tree->GetRegNum(), targetType); - } - - genProduceReg(tree); -} - -#ifdef TARGET_X86 -//------------------------------------------------------------------------ -// genCodeForShiftLong: Generates the code sequence for a GenTree node that -// represents a three operand bit shift or rotate operation (<>Lo). -// -// Arguments: -// tree - the bit shift node (that specifies the type of bit shift to perform). -// -// Assumptions: -// a) All GenTrees are register allocated. -// b) The shift-by-amount in tree->AsOp()->gtOp2 is a contained constant -// -// TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't -// need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to -// targetReg if sourceHi is a memory operand). Similarly for GT_RSH_LO, sourceLo could be marked as -// contained memory-op. Even if not a memory-op, we could mark it as reg-optional. -// -void CodeGen::genCodeForShiftLong(GenTree* tree) -{ - // Only the non-RMW case here. - genTreeOps oper = tree->OperGet(); - assert(oper == GT_LSH_HI || oper == GT_RSH_LO); - - GenTree* operand = tree->AsOp()->gtOp1; - assert(operand->OperGet() == GT_LONG); - assert(operand->AsOp()->gtOp1->isUsedFromReg()); - assert(operand->AsOp()->gtOp2->isUsedFromReg()); - - GenTree* operandLo = operand->gtGetOp1(); - GenTree* operandHi = operand->gtGetOp2(); - - regNumber regLo = operandLo->GetRegNum(); - regNumber regHi = operandHi->GetRegNum(); - - genConsumeOperands(tree->AsOp()); - - var_types targetType = tree->TypeGet(); - instruction ins = genGetInsForOper(oper, targetType); - - GenTree* shiftBy = tree->gtGetOp2(); - - assert(shiftBy->isContainedIntOrIImmed()); - - unsigned int count = (unsigned int)shiftBy->AsIntConCommon()->IconValue(); - - regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo; - - if (regResult != tree->GetRegNum()) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), regResult, targetType); - } - - if (oper == GT_LSH_HI) - { - inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->GetRegNum(), regLo, count); - } - else - { - assert(oper == GT_RSH_LO); - inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->GetRegNum(), regHi, count); - } - - genProduceReg(tree); -} -#endif - -//------------------------------------------------------------------------ -// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that -// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example: -// GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) ) -// -// Arguments: -// storeIndNode: the GT_STOREIND node. -// -void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd) -{ - GenTree* data = storeInd->Data(); - - assert(data->OperIsShift() || data->OperIsRotate()); - - // This function only handles the RMW case. - assert(data->AsOp()->gtOp1->isUsedFromMemory()); - assert(data->AsOp()->gtOp1->isIndir()); - assert(Lowering::IndirsAreEquivalent(data->AsOp()->gtOp1, storeInd)); - assert(data->GetRegNum() == REG_NA); - - var_types targetType = data->TypeGet(); - genTreeOps oper = data->OperGet(); - instruction ins = genGetInsForOper(oper, targetType); - emitAttr attr = EA_ATTR(genTypeSize(targetType)); - - GenTree* shiftBy = data->AsOp()->gtOp2; - if (shiftBy->isContainedIntOrIImmed()) - { - int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue); - if (shiftByValue == 1) - { - // There is no source in this case, as the shift by count is embedded in the instruction opcode itself. - GetEmitter()->emitInsRMW(ins, attr, storeInd); - } - else - { - GetEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy); - } - } - else - { - // We must have the number of bits to shift stored in ECX, since we constrained this node to - // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single - // register destination requirement. - genCopyRegIfNeeded(shiftBy, REG_RCX); - - // The shiftBy operand is implicit, so call the unary version of emitInsRMW. - GetEmitter()->emitInsRMW(ins, attr, storeInd); - } -} - -//------------------------------------------------------------------------ -// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. -// -// Arguments: -// tree - the node. -// -void CodeGen::genCodeForLclAddr(GenTree* tree) -{ - assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); - - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); - - // Address of a local var. - noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); - - emitAttr size = emitTypeSize(targetType); - - inst_RV_TT(INS_lea, targetReg, tree, 0, size); - genProduceReg(tree); -} - -//------------------------------------------------------------------------ -// genCodeForLclFld: Produce code for a GT_LCL_FLD node. -// -// Arguments: -// tree - the GT_LCL_FLD node -// -void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) -{ - assert(tree->OperIs(GT_LCL_FLD)); - - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); - - noway_assert(targetReg != REG_NA); - -#ifdef FEATURE_SIMD - // Loading of TYP_SIMD12 (i.e. Vector3) field - if (targetType == TYP_SIMD12) - { - genLoadLclTypeSIMD12(tree); - return; - } -#endif - - noway_assert(targetType != TYP_STRUCT); - - emitAttr size = emitTypeSize(targetType); - unsigned offs = tree->GetLclOffs(); - unsigned varNum = tree->GetLclNum(); - assert(varNum < compiler->lvaCount); - - GetEmitter()->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); - - genProduceReg(tree); -} - -//------------------------------------------------------------------------ -// genCodeForLclVar: Produce code for a GT_LCL_VAR node. -// -// Arguments: -// tree - the GT_LCL_VAR node -// -void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) -{ - assert(tree->OperIs(GT_LCL_VAR)); - - // lcl_vars are not defs - assert((tree->gtFlags & GTF_VAR_DEF) == 0); - - LclVarDsc* varDsc = compiler->lvaGetDesc(tree); - bool isRegCandidate = varDsc->lvIsRegCandidate(); - - // If this is a register candidate that has been spilled, genConsumeReg() will - // reload it at the point of use. Otherwise, if it's not in a register, we load it here. - - if (!isRegCandidate && !tree->IsMultiReg() && !(tree->gtFlags & GTF_SPILLED)) - { -#if defined(FEATURE_SIMD) && defined(TARGET_X86) - // Loading of TYP_SIMD12 (i.e. Vector3) variable - if (tree->TypeGet() == TYP_SIMD12) - { - genLoadLclTypeSIMD12(tree); - return; - } -#endif // defined(FEATURE_SIMD) && defined(TARGET_X86) - - var_types type = varDsc->GetRegisterType(tree); - GetEmitter()->emitIns_R_S(ins_Load(type, compiler->isSIMDTypeLocalAligned(tree->GetLclNum())), - emitTypeSize(type), tree->GetRegNum(), tree->GetLclNum(), 0); - genProduceReg(tree); - } -} - -//------------------------------------------------------------------------ -// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. -// -// Arguments: -// tree - the GT_STORE_LCL_FLD node -// -void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) -{ - assert(tree->OperIs(GT_STORE_LCL_FLD)); - - var_types targetType = tree->TypeGet(); - GenTree* op1 = tree->gtGetOp1(); - - noway_assert(targetType != TYP_STRUCT); - -#ifdef FEATURE_SIMD - // storing of TYP_SIMD12 (i.e. Vector3) field - if (tree->TypeGet() == TYP_SIMD12) - { - genStoreLclTypeSIMD12(tree); - return; - } -#endif // FEATURE_SIMD - - assert(varTypeUsesFloatReg(targetType) == varTypeUsesFloatReg(op1)); - assert(genTypeSize(genActualType(targetType)) == genTypeSize(genActualType(op1->TypeGet()))); - - genConsumeRegs(op1); - GetEmitter()->emitInsBinary(ins_Store(targetType), emitTypeSize(tree), tree, op1); - - // Updating variable liveness after instruction was emitted - genUpdateLife(tree); -} - -//------------------------------------------------------------------------ -// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. -// -// Arguments: -// lclNode - the GT_STORE_LCL_VAR node -// -void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) -{ - assert(lclNode->OperIs(GT_STORE_LCL_VAR)); - - regNumber targetReg = lclNode->GetRegNum(); - emitter* emit = GetEmitter(); - - GenTree* op1 = lclNode->gtGetOp1(); - - // Stores from a multi-reg source are handled separately. - if (op1->gtSkipReloadOrCopy()->IsMultiRegNode()) - { - genMultiRegStoreToLocal(lclNode); - } - else - { - unsigned lclNum = lclNode->GetLclNum(); - LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); - - var_types targetType = varDsc->GetRegisterType(lclNode); - -#ifdef DEBUG - var_types op1Type = op1->TypeGet(); - if (op1Type == TYP_STRUCT) - { - assert(op1->IsLocal()); - GenTreeLclVar* op1LclVar = op1->AsLclVar(); - unsigned op1lclNum = op1LclVar->GetLclNum(); - LclVarDsc* op1VarDsc = compiler->lvaGetDesc(op1lclNum); - op1Type = op1VarDsc->GetRegisterType(op1LclVar); - } - assert(varTypeUsesFloatReg(targetType) == varTypeUsesFloatReg(op1Type)); - assert(!varTypeUsesFloatReg(targetType) || (emitTypeSize(targetType) == emitTypeSize(op1Type))); -#endif - -#if !defined(TARGET_64BIT) - if (targetType == TYP_LONG) - { - genStoreLongLclVar(lclNode); - return; - } -#endif // !defined(TARGET_64BIT) - -#ifdef FEATURE_SIMD - // storing of TYP_SIMD12 (i.e. Vector3) field - if (targetType == TYP_SIMD12) - { - genStoreLclTypeSIMD12(lclNode); - return; - } -#endif // FEATURE_SIMD - - genConsumeRegs(op1); - - if (op1->OperIs(GT_BITCAST) && op1->isContained()) - { - GenTree* bitCastSrc = op1->gtGetOp1(); - var_types srcType = bitCastSrc->TypeGet(); - noway_assert(!bitCastSrc->isContained()); - if (targetReg == REG_NA) - { - emit->emitIns_S_R(ins_Store(srcType, compiler->isSIMDTypeLocalAligned(lclNum)), - emitTypeSize(targetType), bitCastSrc->GetRegNum(), lclNum, 0); - genUpdateLife(lclNode); - varDsc->SetRegNum(REG_STK); - } - else - { - genBitCast(targetType, targetReg, srcType, bitCastSrc->GetRegNum()); - } - } - else if (targetReg == REG_NA) - { - // stack store - emit->emitInsStoreLcl(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), - emitTypeSize(targetType), lclNode); - varDsc->SetRegNum(REG_STK); - } - else - { - // Look for the case where we have a constant zero which we've marked for reuse, - // but which isn't actually in the register we want. In that case, it's better to create - // zero in the target register, because an xor is smaller than a copy. Note that we could - // potentially handle this in the register allocator, but we can't always catch it there - // because the target may not have a register allocated for it yet. - if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) && (op1->IsIntegralConst(0) || op1->IsFPZero())) - { - op1->SetRegNum(REG_NA); - op1->ResetReuseRegVal(); - op1->SetContained(); - } - - if (!op1->isUsedFromReg()) - { - // Currently, we assume that the non-reg source of a GT_STORE_LCL_VAR writing to a register - // must be a constant. However, in the future we might want to support an operand used from - // memory. This is a bit tricky because we have to decide it can be used from memory before - // register allocation, - // and this would be a case where, once that's done, we need to mark that node as always - // requiring a register - which we always assume now anyway, but once we "optimize" that - // we'll have to take cases like this into account. - assert((op1->GetRegNum() == REG_NA) && op1->OperIsConst()); - genSetRegToConst(targetReg, targetType, op1); - } - else if (op1->GetRegNum() != targetReg) - { - assert(op1->GetRegNum() != REG_NA); - emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(lclNode), lclNode, op1); - } - } - if (targetReg != REG_NA) - { - genProduceReg(lclNode); - } - } -} - -//------------------------------------------------------------------------ -// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node. -// -// Arguments: -// tree - the GT_INDEX_ADDR node -// -void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) -{ - GenTree* const base = node->Arr(); - GenTree* const index = node->Index(); - - const regNumber baseReg = genConsumeReg(base); - regNumber indexReg = genConsumeReg(index); - const regNumber dstReg = node->GetRegNum(); - - // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers - // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the - // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until - // we are finished generating the code for this node. - - gcInfo.gcMarkRegPtrVal(baseReg, base->TypeGet()); - assert(varTypeIsIntegral(index->TypeGet())); - - regNumber tmpReg = REG_NA; -#ifdef TARGET_64BIT - tmpReg = node->GetSingleTempReg(); -#endif - - // Generate the bounds check if necessary. - if ((node->gtFlags & GTF_INX_RNGCHK) != 0) - { -#ifdef TARGET_64BIT - // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index - // is a native int on a 64-bit platform, we will need to widen the array length and then compare. - if (index->TypeGet() == TYP_I_IMPL) - { - GetEmitter()->emitIns_R_AR(INS_mov, EA_4BYTE, tmpReg, baseReg, static_cast(node->gtLenOffset)); - GetEmitter()->emitIns_R_R(INS_cmp, EA_8BYTE, indexReg, tmpReg); - } - else -#endif // TARGET_64BIT - { - GetEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, indexReg, baseReg, static_cast(node->gtLenOffset)); - } - - genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL, node->gtIndRngFailBB); - } - -#ifdef TARGET_64BIT - if (index->TypeGet() != TYP_I_IMPL) - { - // LEA needs 64-bit operands so we need to widen the index if it's TYP_INT. - GetEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, tmpReg, indexReg); - indexReg = tmpReg; - } -#endif // TARGET_64BIT - - // Compute the address of the array element. - unsigned scale = node->gtElemSize; - - switch (scale) - { - case 1: - case 2: - case 4: - case 8: - tmpReg = indexReg; - break; - - default: -#ifdef TARGET_64BIT - // IMUL treats its immediate operand as signed so scale can't be larger than INT32_MAX. - // The VM doesn't allow such large array elements but let's be sure. - noway_assert(scale <= INT32_MAX); -#else // !TARGET_64BIT - tmpReg = node->GetSingleTempReg(); -#endif // !TARGET_64BIT - - GetEmitter()->emitIns_R_I(emitter::inst3opImulForReg(tmpReg), EA_PTRSIZE, indexReg, - static_cast(scale)); - scale = 1; - break; - } - - GetEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node->TypeGet()), dstReg, baseReg, tmpReg, scale, - static_cast(node->gtElemOffset)); - - gcInfo.gcMarkRegSetNpt(base->gtGetRegMask()); - - genProduceReg(node); -} - -//------------------------------------------------------------------------ -// genCodeForIndir: Produce code for a GT_IND node. -// -// Arguments: -// tree - the GT_IND node -// -void CodeGen::genCodeForIndir(GenTreeIndir* tree) -{ - assert(tree->OperIs(GT_IND)); - -#ifdef FEATURE_SIMD - // Handling of Vector3 type values loaded through indirection. - if (tree->TypeGet() == TYP_SIMD12) - { - genLoadIndTypeSIMD12(tree); - return; - } -#endif // FEATURE_SIMD - - var_types targetType = tree->TypeGet(); - emitter* emit = GetEmitter(); - - GenTree* addr = tree->Addr(); - if (addr->IsCnsIntOrI() && addr->IsIconHandle(GTF_ICON_TLS_HDL)) - { - noway_assert(EA_ATTR(genTypeSize(targetType)) == EA_PTRSIZE); - emit->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tree->GetRegNum(), FLD_GLOBAL_FS, - (int)addr->AsIntCon()->gtIconVal); - } - else - { - genConsumeAddress(addr); - emit->emitInsLoadInd(ins_Load(targetType), emitTypeSize(tree), tree->GetRegNum(), tree); - } - - genProduceReg(tree); -} - -//------------------------------------------------------------------------ -// genCodeForStoreInd: Produce code for a GT_STOREIND node. -// -// Arguments: -// tree - the GT_STOREIND node -// -void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) -{ - assert(tree->OperIs(GT_STOREIND)); - -#ifdef FEATURE_SIMD - // Storing Vector3 of size 12 bytes through indirection - if (tree->TypeGet() == TYP_SIMD12) - { - genStoreIndTypeSIMD12(tree); - return; - } -#endif // FEATURE_SIMD - - GenTree* data = tree->Data(); - GenTree* addr = tree->Addr(); - var_types targetType = tree->TypeGet(); - - assert(!varTypeIsFloating(targetType) || (genTypeSize(targetType) == genTypeSize(data->TypeGet()))); - - GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); - if (writeBarrierForm != GCInfo::WBF_NoBarrier) - { - // data and addr must be in registers. - // Consume both registers so that any copies of interfering registers are taken care of. - genConsumeOperands(tree); - - if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data)) - { - return; - } - - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go. - noway_assert(data->GetRegNum() != REG_ARG_0); - - // addr goes in REG_ARG_0 - genCopyRegIfNeeded(addr, REG_ARG_0); - - // data goes in REG_ARG_1 - genCopyRegIfNeeded(data, REG_ARG_1); - - genGCWriteBarrier(tree, writeBarrierForm); - } - else - { - bool dataIsUnary = false; - bool isRMWMemoryOp = tree->IsRMWMemoryOp(); - GenTree* rmwSrc = nullptr; - - // We must consume the operands in the proper execution order, so that liveness is - // updated appropriately. - genConsumeAddress(addr); - - // If tree represents a RMW memory op then its data is a non-leaf node marked as contained - // and non-indir operand of data is the source of RMW memory op. - if (isRMWMemoryOp) - { - assert(data->isContained() && !data->OperIsLeaf()); - - GenTree* rmwDst = nullptr; - - dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); - if (!dataIsUnary) - { - if (tree->IsRMWDstOp1()) - { - rmwDst = data->gtGetOp1(); - rmwSrc = data->gtGetOp2(); - } - else - { - assert(tree->IsRMWDstOp2()); - rmwDst = data->gtGetOp2(); - rmwSrc = data->gtGetOp1(); - } - - genConsumeRegs(rmwSrc); - } - else - { - // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p) - // For unary RMW ops, src and dst of RMW memory op is the same. Lower - // clears operand counts on rmwSrc and we don't need to perform a - // genConsumeReg() on it. - assert(tree->IsRMWDstOp1()); - rmwSrc = data->gtGetOp1(); - rmwDst = data->gtGetOp1(); - assert(rmwSrc->isUsedFromMemory()); - } - - assert(rmwSrc != nullptr); - assert(rmwDst != nullptr); - assert(Lowering::IndirsAreEquivalent(rmwDst, tree)); - } - else - { - genConsumeRegs(data); - } - - if (isRMWMemoryOp) - { - if (dataIsUnary) - { - // generate code for unary RMW memory ops like neg/not - GetEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(tree), tree); - } - else - { - if (data->OperIsShiftOrRotate()) - { - // Generate code for shift RMW memory ops. - // The data address needs to be op1 (it must be [addr] = [addr] , not [addr] = - // [addr]). - assert(tree->IsRMWDstOp1()); - assert(rmwSrc == data->gtGetOp2()); - genCodeForShiftRMW(tree); - } - else if (data->OperGet() == GT_ADD && (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1))) - { - // Generate "inc/dec [mem]" instead of "add/sub [mem], 1". - // - // Notes: - // 1) Global morph transforms GT_SUB(x, +/-1) into GT_ADD(x, -/+1). - // 2) TODO-AMD64: Debugger routine NativeWalker::Decode() runs into - // an assert while decoding ModR/M byte of "inc dword ptr [rax]". - // It is not clear whether Decode() can handle all possible - // addr modes with inc/dec. For this reason, inc/dec [mem] - // is not generated while generating debuggable code. Update - // the above if condition once Decode() routine is fixed. - assert(rmwSrc->isContainedIntOrIImmed()); - instruction ins = rmwSrc->IsIntegralConst(1) ? INS_inc : INS_dec; - GetEmitter()->emitInsRMW(ins, emitTypeSize(tree), tree); - } - else - { - // generate code for remaining binary RMW memory ops like add/sub/and/or/xor - GetEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(tree), - tree, rmwSrc); - } - } - } - else - { - GetEmitter()->emitInsStoreInd(ins_Store(data->TypeGet()), emitTypeSize(tree), tree); - } - } -} - -//------------------------------------------------------------------------ -// genCodeForSwap: Produce code for a GT_SWAP node. -// -// Arguments: -// tree - the GT_SWAP node -// -void CodeGen::genCodeForSwap(GenTreeOp* tree) -{ - assert(tree->OperIs(GT_SWAP)); - - // Swap is only supported for lclVar operands that are enregistered - // We do not consume or produce any registers. Both operands remain enregistered. - // However, the gc-ness may change. - assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2)); - - GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon(); - LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->GetLclNum()]); - var_types type1 = varDsc1->TypeGet(); - GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon(); - LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->GetLclNum()]); - var_types type2 = varDsc2->TypeGet(); - - // We must have both int or both fp regs - assert(!varTypeUsesFloatReg(type1) || varTypeUsesFloatReg(type2)); - - // FP swap is not yet implemented (and should have NYI'd in LSRA) - assert(!varTypeUsesFloatReg(type1)); - - regNumber oldOp1Reg = lcl1->GetRegNum(); - regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); - regNumber oldOp2Reg = lcl2->GetRegNum(); - regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); - - // We don't call genUpdateVarReg because we don't have a tree node with the new register. - varDsc1->SetRegNum(oldOp2Reg); - varDsc2->SetRegNum(oldOp1Reg); - - // Do the xchg - emitAttr size = EA_PTRSIZE; - if (varTypeGCtype(type1) != varTypeGCtype(type2)) - { - // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. - // Otherwise it will leave them alone, which is correct if they have the same GC-ness. - size = EA_GCREF; - } - inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); - - // Update the gcInfo. - // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) - gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); - gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); - - // gcMarkRegPtrVal will do the appropriate thing for non-gc types. - // It will also dump the updates. - gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); - gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); -} - -//------------------------------------------------------------------------ -// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized -// helper functions. -// -// Arguments: -// writeBarrierForm - the write barrier form to use -// addr - the address at which to do the store -// data - the data to store -// -// Return Value: -// true if an optimized write barrier form was used, false if not. If this -// function returns false, the caller must emit a "standard" write barrier. - -bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data) -{ - assert(writeBarrierForm != GCInfo::WBF_NoBarrier); - -#if defined(TARGET_X86) && NOGC_WRITE_BARRIERS - if (!genUseOptimizedWriteBarriers(writeBarrierForm)) - { - return false; - } - - const static int regToHelper[2][8] = { - // If the target is known to be in managed memory - { - CORINFO_HELP_ASSIGN_REF_EAX, // EAX - CORINFO_HELP_ASSIGN_REF_ECX, // ECX - -1, // EDX (always the target address) - CORINFO_HELP_ASSIGN_REF_EBX, // EBX - -1, // ESP - CORINFO_HELP_ASSIGN_REF_EBP, // EBP - CORINFO_HELP_ASSIGN_REF_ESI, // ESI - CORINFO_HELP_ASSIGN_REF_EDI, // EDI - }, - - // Don't know if the target is in managed memory - { - CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, // EAX - CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, // ECX - -1, // EDX (always the target address) - CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, // EBX - -1, // ESP - CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, // EBP - CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, // ESI - CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, // EDI - }, - }; - - noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX); - noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX); - noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX); - noway_assert(regToHelper[0][REG_ESP] == -1); - noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP); - noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI); - noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI); - - noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX); - noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX); - noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX); - noway_assert(regToHelper[1][REG_ESP] == -1); - noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP); - noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI); - noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI); - - regNumber reg = data->GetRegNum(); - noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER)); - - // Generate the following code: - // lea edx, addr - // call write_barrier_helper_reg - - // addr goes in REG_ARG_0 - genCopyRegIfNeeded(addr, REG_WRITE_BARRIER); - - unsigned tgtAnywhere = 0; - if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked) - { - tgtAnywhere = 1; - } - - // We might want to call a modified version of genGCWriteBarrier() to get the benefit of - // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works - // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here. - - genEmitHelperCall(regToHelper[tgtAnywhere][reg], - 0, // argSize - EA_PTRSIZE); // retSize - - return true; -#else // !defined(TARGET_X86) || !NOGC_WRITE_BARRIERS - return false; -#endif // !defined(TARGET_X86) || !NOGC_WRITE_BARRIERS -} - -// Produce code for a GT_CALL node -void CodeGen::genCallInstruction(GenTreeCall* call) -{ - genAlignStackBeforeCall(call); - - gtCallTypes callType = (gtCallTypes)call->gtCallType; - - IL_OFFSETX ilOffset = BAD_IL_OFFSET; - - // all virtuals should have been expanded into a control expression - assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); - - // Insert a GS check if necessary - if (call->IsTailCallViaJitHelper()) - { - if (compiler->getNeedsGSSecurityCookie()) - { -#if FEATURE_FIXED_OUT_ARGS - // If either of the conditions below is true, we will need a temporary register in order to perform the GS - // cookie check. When FEATURE_FIXED_OUT_ARGS is disabled, we save and restore the temporary register using - // push/pop. When FEATURE_FIXED_OUT_ARGS is enabled, however, we need an alternative solution. For now, - // though, the tail prefix is ignored on all platforms that use fixed out args, so we should never hit this - // case. - assert(compiler->gsGlobalSecurityCookieAddr == nullptr); - assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal); -#endif - genEmitGSCookieCheck(true); - } - } - - // Consume all the arg regs - for (GenTreeCall::Use& use : call->LateArgs()) - { - GenTree* argNode = use.GetNode(); - - fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); - assert(curArgTabEntry); - - if (curArgTabEntry->GetRegNum() == REG_STK) - { - continue; - } - -#ifdef UNIX_AMD64_ABI - // Deal with multi register passed struct args. - if (argNode->OperGet() == GT_FIELD_LIST) - { - unsigned regIndex = 0; - for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) - { - GenTree* putArgRegNode = use.GetNode(); - assert(putArgRegNode->gtOper == GT_PUTARG_REG); - regNumber argReg = curArgTabEntry->GetRegNum(regIndex++); - - genConsumeReg(putArgRegNode); - - // Validate the putArgRegNode has the right type. - assert(varTypeUsesFloatReg(putArgRegNode->TypeGet()) == genIsValidFloatReg(argReg)); - if (putArgRegNode->GetRegNum() != argReg) - { - inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), false), argReg, putArgRegNode->GetRegNum()); - } - } - } - else -#endif // UNIX_AMD64_ABI - { - regNumber argReg = curArgTabEntry->GetRegNum(); - genConsumeReg(argNode); - if (argNode->GetRegNum() != argReg) - { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), false), argReg, argNode->GetRegNum()); - } - } - -#if FEATURE_VARARG - // In the case of a varargs call, - // the ABI dictates that if we have floating point args, - // we must pass the enregistered arguments in both the - // integer and floating point registers so, let's do that. - if (call->IsVarargs() && varTypeIsFloating(argNode)) - { - regNumber srcReg = argNode->GetRegNum(); - regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum()); - inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg); - } -#endif // FEATURE_VARARG - } - -#if defined(TARGET_X86) || defined(UNIX_AMD64_ABI) - // The call will pop its arguments. - // for each putarg_stk: - target_ssize_t stackArgBytes = 0; - for (GenTreeCall::Use& use : call->Args()) - { - GenTree* arg = use.GetNode(); - if (arg->OperIs(GT_PUTARG_STK) && ((arg->gtFlags & GTF_LATE_ARG) == 0)) - { - GenTree* source = arg->AsPutArgStk()->gtGetOp1(); - unsigned size = arg->AsPutArgStk()->GetStackByteSize(); - stackArgBytes += size; -#ifdef DEBUG - fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); - assert(curArgTabEntry != nullptr); - assert(size == (curArgTabEntry->numSlots * TARGET_POINTER_SIZE)); -#ifdef FEATURE_PUT_STRUCT_ARG_STK - if (!source->OperIs(GT_FIELD_LIST) && (source->TypeGet() == TYP_STRUCT)) - { - GenTreeObj* obj = source->AsObj(); - unsigned argBytes = roundUp(obj->GetLayout()->GetSize(), TARGET_POINTER_SIZE); -#ifdef TARGET_X86 - // If we have an OBJ, we must have created a copy if the original arg was not a - // local and was not a multiple of TARGET_POINTER_SIZE. - // Note that on x64/ux this will be handled by unrolling in genStructPutArgUnroll. - assert((argBytes == obj->GetLayout()->GetSize()) || obj->Addr()->IsLocalAddrExpr()); -#endif // TARGET_X86 - assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes); - } -#endif // FEATURE_PUT_STRUCT_ARG_STK -#endif // DEBUG - } - } -#endif // defined(TARGET_X86) || defined(UNIX_AMD64_ABI) - - // Insert a null check on "this" pointer if asked. - if (call->NeedsNullCheck()) - { - const regNumber regThis = genGetThisArgReg(call); - GetEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); - } - - // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = call->gtControlExpr; - if (callType == CT_INDIRECT) - { - assert(target == nullptr); - target = call->gtCallAddr; - methHnd = nullptr; - } - else - { - methHnd = call->gtCallMethHnd; - } - - CORINFO_SIG_INFO* sigInfo = nullptr; -#ifdef DEBUG - // Pass the call signature information down into the emitter so the emitter can associate - // native call sites with the signatures they were generated from. - if (callType != CT_HELPER) - { - sigInfo = call->callSig; - } -#endif // DEBUG - - // If fast tail call, then we are done. In this case we setup the args (both reg args - // and stack args in incoming arg area) and call target in rax. Epilog sequence would - // generate "jmp rax". - if (call->IsFastTailCall()) - { - // Don't support fast tail calling JIT helpers - assert(callType != CT_HELPER); - - // If this is indirect then we go through RAX with epilog sequence - // generating "jmp rax". Otherwise epilog will try to generate a - // rip-relative jump. - if (target != nullptr) - { - genConsumeReg(target); - genCopyRegIfNeeded(target, REG_RAX); - } - - return; - } - - // For a pinvoke to unmanged code we emit a label to clear - // the GC pointer state before the callsite. - // We can't utilize the typical lazy killing of GC pointers - // at (or inside) the callsite. - if (compiler->killGCRefs(call)) - { - genDefineTempLabel(genCreateTempLabel()); - } - - // Determine return value size(s). - const ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - emitAttr retSize = EA_PTRSIZE; - emitAttr secondRetSize = EA_UNKNOWN; - - if (call->HasMultiRegRetVal()) - { - retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0)); - secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1)); - } - else - { - assert(!varTypeIsStruct(call)); - - if (call->gtType == TYP_REF) - { - retSize = EA_GCREF; - } - else if (call->gtType == TYP_BYREF) - { - retSize = EA_BYREF; - } - } - -#if defined(DEBUG) && defined(TARGET_X86) - // Store the stack pointer so we can check it after the call. - if (compiler->opts.compStackCheckOnCall && call->gtCallType == CT_USER_FUNC) - { - noway_assert(compiler->lvaCallSpCheck != 0xCCCCCCCC && - compiler->lvaTable[compiler->lvaCallSpCheck].lvDoNotEnregister && - compiler->lvaTable[compiler->lvaCallSpCheck].lvOnFrame); - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallSpCheck, 0); - } -#endif // defined(DEBUG) && defined(TARGET_X86) - - bool fPossibleSyncHelperCall = false; - CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; - - // We need to propagate the IL offset information to the call instruction, so we can emit - // an IL to native mapping record for the call, to support managed return value debugging. - // We don't want tail call helper calls that were converted from normal calls to get a record, - // so we skip this hash table lookup logic in that case. - if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) - { - (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); - } - -#if defined(TARGET_X86) - bool fCallerPop = call->CallerPop(); - - // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will - // adjust its stack level accordingly. - // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the - // pop when we're done. - target_ssize_t argSizeForEmitter = stackArgBytes; - if (fCallerPop) - { - argSizeForEmitter = -stackArgBytes; - } -#endif // defined(TARGET_X86) - - // When it's a PInvoke call and the call type is USER function, we issue VZEROUPPER here - // if the function contains 256bit AVX instructions, this is to avoid AVX-256 to Legacy SSE - // transition penalty, assuming the user function contains legacy SSE instruction. - // To limit code size increase impact: we only issue VZEROUPPER before PInvoke call, not issue - // VZEROUPPER after PInvoke call because transition penalty from legacy SSE to AVX only happens - // when there's preceding 256-bit AVX to legacy SSE transition penalty. - if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && GetEmitter()->Contains256bitAVX()) - { - assert(compiler->canUseVexEncoding()); - instGen(INS_vzeroupper); - } - - if (callType == CT_HELPER && compiler->info.compFlags & CORINFO_FLG_SYNCH) - { - fPossibleSyncHelperCall = true; - helperNum = compiler->eeGetHelperNum(methHnd); - noway_assert(helperNum != CORINFO_HELP_UNDEF); - } - - if (target != nullptr) - { -#ifdef TARGET_X86 - if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) - { - // On x86, we need to generate a very specific pattern for indirect VSD calls: - // - // 3-byte nop - // call dword ptr [eax] - // - // Where EAX is also used as an argument to the stub dispatch helper. Make - // sure that the call target address is computed into EAX in this case. - - assert(compiler->virtualStubParamInfo->GetReg() == REG_VIRTUAL_STUB_TARGET); - - assert(target->isContainedIndir()); - assert(target->OperGet() == GT_IND); - - GenTree* addr = target->AsIndir()->Addr(); - assert(addr->isUsedFromReg()); - - genConsumeReg(addr); - genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET); - - GetEmitter()->emitIns_Nop(3); - - // clang-format off - GetEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD), - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, - argSizeForEmitter, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0); - // clang-format on - } - else -#endif - if (target->isContainedIndir()) - { - if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()) - { - // Note that if gtControlExpr is an indir of an absolute address, we mark it as - // contained only if it can be encoded as PC-relative offset. - assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler)); - - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue() - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - ilOffset); - // clang-format on - } - else - { - // clang-format off - genEmitCall(emitter::EC_INDIR_ARD, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - target->AsIndir() - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - ilOffset); - // clang-format on - } - } - else - { - // We have already generated code for gtControlExpr evaluating it into a register. - // We just need to emit "call reg" in this case. - assert(genIsValidIntReg(target->GetRegNum())); - - // clang-format off - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr // addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - ilOffset, - genConsumeReg(target)); - // clang-format on - } - } -#ifdef FEATURE_READYTORUN_COMPILER - else if (call->gtEntryPoint.addr != nullptr) - { - // clang-format off - genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN - : emitter::EC_FUNC_TOKEN_INDIR, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - (void*) call->gtEntryPoint.addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - ilOffset); - // clang-format on - } -#endif - else - { - // Generate a direct call to a non-virtual user defined or helper method - assert(callType == CT_HELPER || callType == CT_USER_FUNC); - - void* addr = nullptr; - if (callType == CT_HELPER) - { - // Direct call to a helper method. - helperNum = compiler->eeGetHelperNum(methHnd); - noway_assert(helperNum != CORINFO_HELP_UNDEF); - - void* pAddr = nullptr; - addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); - assert(pAddr == nullptr); - } - else - { - // Direct call to a non-virtual user function. - addr = call->gtDirectCallAddress; - } - - assert(addr != nullptr); - - // Non-virtual direct calls to known addresses - - // clang-format off - genEmitCall(emitter::EC_FUNC_TOKEN, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - ilOffset); - // clang-format on - } - - // if it was a pinvoke or intrinsic we may have needed to get the address of a label - if (genPendingCallLabel) - { - genDefineInlineTempLabel(genPendingCallLabel); - genPendingCallLabel = nullptr; - } - - // Update GC info: - // All Callee arg registers are trashed and no longer contain any GC pointers. - // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? - // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other - // registers from RBM_CALLEE_TRASH. - assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; - gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; - - var_types returnType = call->TypeGet(); - if (returnType != TYP_VOID) - { -#ifdef TARGET_X86 - if (varTypeIsFloating(returnType)) - { - // Spill the value from the fp stack. - // Then, load it into the target register. - call->gtFlags |= GTF_SPILL; - regSet.rsSpillFPStack(call); - call->gtFlags |= GTF_SPILLED; - call->gtFlags &= ~GTF_SPILL; - } - else -#endif // TARGET_X86 - { - regNumber returnReg; - - if (call->HasMultiRegRetVal()) - { - assert(retTypeDesc != nullptr); - const unsigned regCount = retTypeDesc->GetReturnRegCount(); - - // If regs allocated to call node are different from ABI return - // regs in which the call has returned its result, move the result - // to regs allocated to call node. - for (unsigned i = 0; i < regCount; ++i) - { - var_types regType = retTypeDesc->GetReturnRegType(i); - returnReg = retTypeDesc->GetABIReturnReg(i); - regNumber allocatedReg = call->GetRegNumByIdx(i); - if (returnReg != allocatedReg) - { - inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); - } - } - -#ifdef FEATURE_SIMD - // A Vector3 return value is stored in xmm0 and xmm1. - // RyuJIT assumes that the upper unused bits of xmm1 are cleared but - // the native compiler doesn't guarantee it. - if (returnType == TYP_SIMD12) - { - returnReg = retTypeDesc->GetABIReturnReg(1); - // Clear the upper 32 bits by two shift instructions. - // retReg = retReg << 96 - // retReg = retReg >> 96 - GetEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12); - GetEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12); - } -#endif // FEATURE_SIMD - } - else - { -#ifdef TARGET_X86 - if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) - { - // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with - // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the - // correct argument registers. - returnReg = REG_PINVOKE_TCB; - } - else -#endif // TARGET_X86 - if (varTypeIsFloating(returnType)) - { - returnReg = REG_FLOATRET; - } - else - { - returnReg = REG_INTRET; - } - - if (call->GetRegNum() != returnReg) - { - inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType); - } - } - - genProduceReg(call); - } - } - - // If there is nothing next, that means the result is thrown away, so this value is not live. - // However, for minopts or debuggable code, we keep it live to support managed return value debugging. - if ((call->gtNext == nullptr) && compiler->opts.OptimizationEnabled()) - { - gcInfo.gcMarkRegSetNpt(RBM_INTRET); - } - -#if defined(DEBUG) && defined(TARGET_X86) - if (compiler->opts.compStackCheckOnCall && call->gtCallType == CT_USER_FUNC) - { - noway_assert(compiler->lvaCallSpCheck != 0xCCCCCCCC && - compiler->lvaTable[compiler->lvaCallSpCheck].lvDoNotEnregister && - compiler->lvaTable[compiler->lvaCallSpCheck].lvOnFrame); - if (!fCallerPop && (stackArgBytes != 0)) - { - // ECX is trashed, so can be used to compute the expected SP. We saved the value of SP - // after pushing all the stack arguments, but the caller popped the arguments, so we need - // to do some math to figure a good comparison. - GetEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE); - GetEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, stackArgBytes); - GetEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallSpCheck, 0); - } - else - { - GetEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallSpCheck, 0); - } - - BasicBlock* sp_check = genCreateTempLabel(); - GetEmitter()->emitIns_J(INS_je, sp_check); - instGen(INS_BREAKPOINT); - genDefineTempLabel(sp_check); - } -#endif // defined(DEBUG) && defined(TARGET_X86) - -#if !defined(FEATURE_EH_FUNCLETS) - //------------------------------------------------------------------------- - // Create a label for tracking of region protected by the monitor in synchronized methods. - // This needs to be here, rather than above where fPossibleSyncHelperCall is set, - // so the GC state vars have been updated before creating the label. - - if (fPossibleSyncHelperCall) - { - switch (helperNum) - { - case CORINFO_HELP_MON_ENTER: - case CORINFO_HELP_MON_ENTER_STATIC: - noway_assert(compiler->syncStartEmitCookie == NULL); - compiler->syncStartEmitCookie = - GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); - noway_assert(compiler->syncStartEmitCookie != NULL); - break; - case CORINFO_HELP_MON_EXIT: - case CORINFO_HELP_MON_EXIT_STATIC: - noway_assert(compiler->syncEndEmitCookie == NULL); - compiler->syncEndEmitCookie = - GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); - noway_assert(compiler->syncEndEmitCookie != NULL); - break; - default: - break; - } - } -#endif // !FEATURE_EH_FUNCLETS - - unsigned stackAdjustBias = 0; - -#if defined(TARGET_X86) - // Is the caller supposed to pop the arguments? - if (fCallerPop && (stackArgBytes != 0)) - { - stackAdjustBias = stackArgBytes; - } - - SubtractStackLevel(stackArgBytes); -#endif // TARGET_X86 - - genRemoveAlignmentAfterCall(call, stackAdjustBias); -} - -// Produce code for a GT_JMP node. -// The arguments of the caller needs to be transferred to the callee before exiting caller. -// The actual jump to callee is generated as part of caller epilog sequence. -// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. -void CodeGen::genJmpMethod(GenTree* jmp) -{ - assert(jmp->OperGet() == GT_JMP); - assert(compiler->compJmpOpUsed); - - // If no arguments, nothing to do - if (compiler->info.compArgsCount == 0) - { - return; - } - - // Make sure register arguments are in their initial registers - // and stack arguments are put back as well. - unsigned varNum; - LclVarDsc* varDsc; - - // First move any en-registered stack arguments back to the stack. - // At the same time any reg arg not in correct reg is moved back to its stack location. - // - // We are not strictly required to spill reg args that are not in the desired reg for a jmp call - // But that would require us to deal with circularity while moving values around. Spilling - // to stack makes the implementation simple, which is not a bad trade off given Jmp calls - // are not frequent. - for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) - { - varDsc = compiler->lvaTable + varNum; - - if (varDsc->lvPromoted) - { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = varDsc->lvFieldLclStart; - varDsc = compiler->lvaTable + fieldVarNum; - } - noway_assert(varDsc->lvIsParam); - - if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK)) - { - // Skip reg args which are already in its right register for jmp call. - // If not, we will spill such args to their stack locations. - // - // If we need to generate a tail call profiler hook, then spill all - // arg regs to free them up for the callback. - if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg())) - { - continue; - } - } - else if (varDsc->GetRegNum() == REG_STK) - { - // Skip args which are currently living in stack. - continue; - } - - // If we came here it means either a reg argument not in the right register or - // a stack argument currently living in a register. In either case the following - // assert should hold. - assert(varDsc->GetRegNum() != REG_STK); - - assert(!varDsc->lvIsStructField || (compiler->lvaTable[varDsc->lvParentLcl].lvFieldCnt == 1)); - var_types storeType = genActualType(varDsc->lvaArgType()); // We own the memory and can use the full move. - GetEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), varDsc->GetRegNum(), varNum, 0); - - // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. - // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. - // Therefore manually update life of varDsc->GetRegNum(). - regMaskTP tempMask = varDsc->lvRegMask(); - regSet.RemoveMaskVars(tempMask); - gcInfo.gcMarkRegSetNpt(tempMask); - if (compiler->lvaIsGCTracked(varDsc)) - { -#ifdef DEBUG - if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); - } - else - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); - } -#endif // DEBUG - - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - } - } - -#ifdef PROFILING_SUPPORTED - // At this point all arg regs are free. - // Emit tail call profiler callback. - genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); -#endif - - // Next move any un-enregistered register arguments back to their register. - regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. - unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. - for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) - { - varDsc = compiler->lvaTable + varNum; - if (varDsc->lvPromoted) - { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = varDsc->lvFieldLclStart; - varDsc = compiler->lvaTable + fieldVarNum; - } - noway_assert(varDsc->lvIsParam); - - // Skip if arg not passed in a register. - if (!varDsc->lvIsRegArg) - { - continue; - } - -#if defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(varDsc)) - { - CORINFO_CLASS_HANDLE typeHnd = varDsc->GetStructHnd(); - assert(typeHnd != nullptr); - - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; - compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); - assert(structDesc.passedInRegisters); - - unsigned __int8 offset0 = 0; - unsigned __int8 offset1 = 0; - var_types type0 = TYP_UNKNOWN; - var_types type1 = TYP_UNKNOWN; - - // Get the eightbyte data - compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1); - - // Move the values into the right registers. - // - - // Update varDsc->GetArgReg() and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and - // argReg is going live. Note that we cannot modify varDsc->GetRegNum() and lvOtherArgReg here - // because another basic block may not be expecting it. - // Therefore manually update life of argReg. Note that GT_JMP marks - // the end of the basic block and after which reg life and gc info will be recomputed for the new block in - // genCodeForBBList(). - if (type0 != TYP_UNKNOWN) - { - GetEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->GetArgReg(), varNum, offset0); - regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetArgReg())); - gcInfo.gcMarkRegPtrVal(varDsc->GetArgReg(), type0); - } - - if (type1 != TYP_UNKNOWN) - { - GetEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->GetOtherArgReg(), varNum, - offset1); - regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetOtherArgReg())); - gcInfo.gcMarkRegPtrVal(varDsc->GetOtherArgReg(), type1); - } - - if (varDsc->lvTracked) - { - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - } - } - else -#endif // !defined(UNIX_AMD64_ABI) - { - // Register argument - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_X86 - noway_assert( - isRegParamType(genActualType(varDsc->TypeGet())) || - (varTypeIsStruct(varDsc->TypeGet()) && compiler->isTrivialPointerSizedStruct(varDsc->GetStructHnd()))); -#else - noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); -#endif // TARGET_X86 - - // Is register argument already in the right register? - // If not load it from its stack location. - var_types loadType = varDsc->lvaArgType(); - -#ifdef TARGET_X86 - if (varTypeIsStruct(varDsc->TypeGet())) - { - // Treat trivial pointer-sized structs as a pointer sized primitive - // for the purposes of registers. - loadType = TYP_I_IMPL; - } -#endif - - regNumber argReg = varDsc->GetArgReg(); // incoming arg register - - if (varDsc->GetRegNum() != argReg) - { - assert(genIsValidReg(argReg)); - GetEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); - - // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. - // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be - // expecting it. Therefore manually update life of argReg. Note that GT_JMP marks the end of the - // basic block and after which reg life and gc info will be recomputed for the new block in - // genCodeForBBList(). - regSet.AddMaskVars(genRegMask(argReg)); - gcInfo.gcMarkRegPtrVal(argReg, loadType); - if (compiler->lvaIsGCTracked(varDsc)) - { -#ifdef DEBUG - if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum); - } - else - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum); - } -#endif // DEBUG - - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - } - } - } - -#if FEATURE_VARARG && defined(TARGET_AMD64) - // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg - // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to - // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point - // values on the stack. - if (compiler->info.compIsVarArgs) - { - regNumber intArgReg; - var_types loadType = varDsc->lvaArgType(); - regNumber argReg = varDsc->GetArgReg(); // incoming arg register - - if (varTypeIsFloating(loadType)) - { - intArgReg = compiler->getCallArgIntRegister(argReg); - inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); - } - else - { - intArgReg = argReg; - } - - fixedIntArgMask |= genRegMask(intArgReg); - - if (intArgReg == REG_ARG_0) - { - assert(firstArgVarNum == BAD_VAR_NUM); - firstArgVarNum = varNum; - } - } -#endif // FEATURE_VARARG - } - -#if FEATURE_VARARG && defined(TARGET_AMD64) - // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments, - // load the remaining arg registers (both int and float) from the corresponding - // shadow stack slots. This is for the reason that we don't know the number and type - // of non-fixed params passed by the caller, therefore we have to assume the worst case - // of caller passing float/double args both in int and float arg regs. - // - // This doesn't apply to x86, which doesn't pass floating point values in floating - // point registers. - // - // The caller could have passed gc-ref/byref type var args. Since these are var args - // the callee no way of knowing their gc-ness. Therefore, mark the region that loads - // remaining arg registers from shadow stack slots as non-gc interruptible. - if (fixedIntArgMask != RBM_NONE) - { - assert(compiler->info.compIsVarArgs); - assert(firstArgVarNum != BAD_VAR_NUM); - - regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; - if (remainingIntArgMask != RBM_NONE) - { - GetEmitter()->emitDisableGC(); - for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) - { - regNumber argReg = intArgRegs[argNum]; - regMaskTP argRegMask = genRegMask(argReg); - - if ((remainingIntArgMask & argRegMask) != 0) - { - remainingIntArgMask &= ~argRegMask; - GetEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset); - - // also load it in corresponding float arg reg - regNumber floatReg = compiler->getCallArgFloatRegister(argReg); - inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg); - } - - argOffset += REGSIZE_BYTES; - } - GetEmitter()->emitEnableGC(); - } - } -#endif // FEATURE_VARARG -} - -// produce code for a GT_LEA subnode -void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) -{ - emitAttr size = emitTypeSize(lea); - genConsumeOperands(lea); - - if (lea->Base() && lea->Index()) - { - regNumber baseReg = lea->Base()->GetRegNum(); - regNumber indexReg = lea->Index()->GetRegNum(); - GetEmitter()->emitIns_R_ARX(INS_lea, size, lea->GetRegNum(), baseReg, indexReg, lea->gtScale, lea->Offset()); - } - else if (lea->Base()) - { - GetEmitter()->emitIns_R_AR(INS_lea, size, lea->GetRegNum(), lea->Base()->GetRegNum(), lea->Offset()); - } - else if (lea->Index()) - { - GetEmitter()->emitIns_R_ARX(INS_lea, size, lea->GetRegNum(), REG_NA, lea->Index()->GetRegNum(), lea->gtScale, - lea->Offset()); - } - - genProduceReg(lea); -} - -//------------------------------------------------------------------------ -// genCompareFloat: Generate code for comparing two floating point values -// -// Arguments: -// treeNode - the compare tree -// -void CodeGen::genCompareFloat(GenTree* treeNode) -{ - assert(treeNode->OperIsCompare()); - - GenTreeOp* tree = treeNode->AsOp(); - GenTree* op1 = tree->gtOp1; - GenTree* op2 = tree->gtOp2; - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - - genConsumeOperands(tree); - - assert(varTypeIsFloating(op1Type)); - assert(op1Type == op2Type); - - regNumber targetReg = treeNode->GetRegNum(); - instruction ins; - emitAttr cmpAttr; - - GenCondition condition = GenCondition::FromFloatRelop(treeNode); - - if (condition.PreferSwap()) - { - condition = GenCondition::Swap(condition); - std::swap(op1, op2); - } - - ins = ins_FloatCompare(op1Type); - cmpAttr = emitTypeSize(op1Type); - - GetEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - if ((condition.GetCode() == GenCondition::FNEU) && (op1->GetRegNum() == op2->GetRegNum())) - { - // For floating point, `x != x` is a common way of - // checking for NaN. So, in the case where both - // operands are the same, we can optimize codegen - // to only do a single check. - - condition = GenCondition(GenCondition::P); - } - - inst_SETCC(condition, treeNode->TypeGet(), targetReg); - genProduceReg(tree); - } -} - -//------------------------------------------------------------------------ -// genCompareInt: Generate code for comparing ints or, on amd64, longs. -// -// Arguments: -// treeNode - the compare tree -// -// Return Value: -// None. -void CodeGen::genCompareInt(GenTree* treeNode) -{ - assert(treeNode->OperIsCompare() || treeNode->OperIs(GT_CMP)); - - GenTreeOp* tree = treeNode->AsOp(); - GenTree* op1 = tree->gtOp1; - GenTree* op2 = tree->gtOp2; - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - regNumber targetReg = tree->GetRegNum(); - emitter* emit = GetEmitter(); - bool canReuseFlags = false; - - genConsumeOperands(tree); - - assert(!op1->isContainedIntOrIImmed()); - assert(!varTypeIsFloating(op2Type)); - - instruction ins; - var_types type = TYP_UNKNOWN; - - if (tree->OperIs(GT_TEST_EQ, GT_TEST_NE)) - { - ins = INS_test; - - // Unlike many xarch instructions TEST doesn't have a form with a 16/32/64 bit first operand and - // an 8 bit immediate second operand. But if the immediate value fits in 8 bits then we can simply - // emit a 8 bit TEST instruction, unless we're targeting x86 and the first operand is a non-byteable - // register. - // Note that lowering does something similar but its main purpose is to allow memory operands to be - // contained so it doesn't handle other kind of operands. It could do more but on x86 that results - // in additional register constrains and that may be worse than wasting 3 bytes on an immediate. - if ( -#ifdef TARGET_X86 - (!op1->isUsedFromReg() || isByteReg(op1->GetRegNum())) && -#endif - (op2->IsCnsIntOrI() && genSmallTypeCanRepresentValue(TYP_UBYTE, op2->AsIntCon()->IconValue()))) - { - type = TYP_UBYTE; - } - } - else if (op1->isUsedFromReg() && op2->IsIntegralConst(0)) - { - if (compiler->opts.OptimizationEnabled()) - { - emitAttr op1Size = emitActualTypeSize(op1->TypeGet()); - assert((int)op1Size >= 4); - - // Optimize "x<0" and "x>=0" to "x>>31" if "x" is not a jump condition and in a reg. - // Morph/Lowering are responsible to rotate "00" so we won't handle it here. - if ((targetReg != REG_NA) && tree->OperIs(GT_LT, GT_GE) && !tree->IsUnsigned()) - { - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(INS_mov, targetReg, op1->GetRegNum(), op1->TypeGet()); - } - if (tree->OperIs(GT_GE)) - { - // emit "not" for "x>=0" case - inst_RV(INS_not, targetReg, op1->TypeGet()); - } - inst_RV_IV(INS_shr_N, targetReg, (int)op1Size * 8 - 1, op1Size); - genProduceReg(tree); - return; - } - canReuseFlags = true; - } - - // We're comparing a register to 0 so we can generate "test reg1, reg1" - // instead of the longer "cmp reg1, 0" - ins = INS_test; - op2 = op1; - } - else - { - ins = INS_cmp; - } - - if (type == TYP_UNKNOWN) - { - if (op1Type == op2Type) - { - type = op1Type; - } - else if (genTypeSize(op1Type) == genTypeSize(op2Type)) - { - // If the types are different but have the same size then we'll use TYP_INT or TYP_LONG. - // This primarily deals with small type mixes (e.g. byte/ubyte) that need to be widened - // and compared as int. We should not get long type mixes here but handle that as well - // just in case. - type = genTypeSize(op1Type) == 8 ? TYP_LONG : TYP_INT; - } - else - { - // In the types are different simply use TYP_INT. This deals with small type/int type - // mixes (e.g. byte/short ubyte/int) that need to be widened and compared as int. - // Lowering is expected to handle any mixes that involve long types (e.g. int/long). - type = TYP_INT; - } - - // The common type cannot be smaller than any of the operand types, we're probably mixing int/long - assert(genTypeSize(type) >= max(genTypeSize(op1Type), genTypeSize(op2Type))); - // Small unsigned int types (TYP_BOOL can use anything) should use unsigned comparisons - assert(!(varTypeIsSmallInt(type) && varTypeIsUnsigned(type)) || ((tree->gtFlags & GTF_UNSIGNED) != 0)); - // If op1 is smaller then it cannot be in memory, we're probably missing a cast - assert((genTypeSize(op1Type) >= genTypeSize(type)) || !op1->isUsedFromMemory()); - // If op2 is smaller then it cannot be in memory, we're probably missing a cast - assert((genTypeSize(op2Type) >= genTypeSize(type)) || !op2->isUsedFromMemory()); - // If we ended up with a small type and op2 is a constant then make sure we don't lose constant bits - assert(!op2->IsCnsIntOrI() || !varTypeIsSmall(type) || - genSmallTypeCanRepresentValue(type, op2->AsIntCon()->IconValue())); - } - - // The type cannot be larger than the machine word size - assert(genTypeSize(type) <= genTypeSize(TYP_I_IMPL)); - // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned - assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type)); - - bool needsOCFlags = !tree->OperIs(GT_EQ, GT_NE); - if (canReuseFlags && emit->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(type), needsOCFlags)) - { - JITDUMP("Not emitting compare due to flags being already set\n"); - } - else - { - emit->emitInsBinary(ins, emitTypeSize(type), op1, op2); - } - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - inst_SETCC(GenCondition::FromIntegralRelop(tree), tree->TypeGet(), targetReg); - genProduceReg(tree); - } -} - -#if !defined(TARGET_64BIT) -//------------------------------------------------------------------------ -// genLongToIntCast: Generate code for long to int casts on x86. -// -// Arguments: -// cast - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// The cast node and its sources (via GT_LONG) must have been assigned registers. -// The destination cannot be a floating point type or a small integer type. -// -void CodeGen::genLongToIntCast(GenTree* cast) -{ - assert(cast->OperGet() == GT_CAST); - - GenTree* src = cast->gtGetOp1(); - noway_assert(src->OperGet() == GT_LONG); - - genConsumeRegs(src); - - var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG; - var_types dstType = cast->CastToType(); - regNumber loSrcReg = src->gtGetOp1()->GetRegNum(); - regNumber hiSrcReg = src->gtGetOp2()->GetRegNum(); - regNumber dstReg = cast->GetRegNum(); - - assert((dstType == TYP_INT) || (dstType == TYP_UINT)); - assert(genIsValidIntReg(loSrcReg)); - assert(genIsValidIntReg(hiSrcReg)); - assert(genIsValidIntReg(dstReg)); - - if (cast->gtOverflow()) - { - // - // Generate an overflow check for [u]long to [u]int casts: - // - // long -> int - check if the upper 33 bits are all 0 or all 1 - // - // ulong -> int - check if the upper 33 bits are all 0 - // - // long -> uint - check if the upper 32 bits are all 0 - // ulong -> uint - check if the upper 32 bits are all 0 - // - - if ((srcType == TYP_LONG) && (dstType == TYP_INT)) - { - BasicBlock* allOne = genCreateTempLabel(); - BasicBlock* success = genCreateTempLabel(); - - inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE); - inst_JMP(EJ_js, allOne); - - inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE); - genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); - inst_JMP(EJ_jmp, success); - - genDefineTempLabel(allOne); - inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE); - genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); - - genDefineTempLabel(success); - } - else - { - if ((srcType == TYP_ULONG) && (dstType == TYP_INT)) - { - inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE); - genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW); - } - - inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE); - genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); - } - } - - if (dstReg != loSrcReg) - { - inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE); - } - - genProduceReg(cast); -} -#endif - -//------------------------------------------------------------------------ -// genIntCastOverflowCheck: Generate overflow checking code for an integer cast. -// -// Arguments: -// cast - The GT_CAST node -// desc - The cast description -// reg - The register containing the value to check -// -void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg) -{ - switch (desc.CheckKind()) - { - case GenIntCastDesc::CHECK_POSITIVE: - GetEmitter()->emitIns_R_R(INS_test, EA_SIZE(desc.CheckSrcSize()), reg, reg); - genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); - break; - -#ifdef TARGET_64BIT - case GenIntCastDesc::CHECK_UINT_RANGE: - { - // We need to check if the value is not greater than 0xFFFFFFFF but this value - // cannot be encoded in an immediate operand. Use a right shift to test if the - // upper 32 bits are zero. This requires a temporary register. - const regNumber tempReg = cast->GetSingleTempReg(); - assert(tempReg != reg); - GetEmitter()->emitIns_R_R(INS_mov, EA_8BYTE, tempReg, reg); - GetEmitter()->emitIns_R_I(INS_shr_N, EA_8BYTE, tempReg, 32); - genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); - } - break; - - case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE: - GetEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MAX); - genJumpToThrowHlpBlk(EJ_ja, SCK_OVERFLOW); - break; - - case GenIntCastDesc::CHECK_INT_RANGE: - GetEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MAX); - genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW); - GetEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MIN); - genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); - break; -#endif - - default: - { - assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE); - const int castMaxValue = desc.CheckSmallIntMax(); - const int castMinValue = desc.CheckSmallIntMin(); - - GetEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(desc.CheckSrcSize()), reg, castMaxValue); - genJumpToThrowHlpBlk((castMinValue == 0) ? EJ_ja : EJ_jg, SCK_OVERFLOW); - - if (castMinValue != 0) - { - GetEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(desc.CheckSrcSize()), reg, castMinValue); - genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); - } - } - break; - } -} - -//------------------------------------------------------------------------ -// genIntToIntCast: Generate code for an integer cast, with or without overflow check. -// -// Arguments: -// cast - The GT_CAST node -// -// Assumptions: -// The cast node is not a contained node and must have an assigned register. -// Neither the source nor target type can be a floating point type. -// On x86 casts to (U)BYTE require that the source be in a byte register. -// -// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register. -// -void CodeGen::genIntToIntCast(GenTreeCast* cast) -{ - genConsumeRegs(cast->gtGetOp1()); - - const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); - const regNumber dstReg = cast->GetRegNum(); - emitter* emit = GetEmitter(); - - assert(genIsValidIntReg(srcReg)); - assert(genIsValidIntReg(dstReg)); - - GenIntCastDesc desc(cast); - - if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) - { - genIntCastOverflowCheck(cast, desc, srcReg); - } - - if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) - { - instruction ins; - unsigned insSize; - bool canSkip = false; - - switch (desc.ExtendKind()) - { - case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: - ins = INS_movzx; - insSize = desc.ExtendSrcSize(); - break; - case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: - ins = INS_movsx; - insSize = desc.ExtendSrcSize(); - break; -#ifdef TARGET_64BIT - case GenIntCastDesc::ZERO_EXTEND_INT: - // We can skip emitting this zero extending move if the previous instruction zero extended implicitly - if ((srcReg == dstReg) && compiler->opts.OptimizationEnabled()) - { - canSkip = emit->AreUpper32BitsZero(srcReg); - } - - ins = INS_mov; - insSize = 4; - break; - case GenIntCastDesc::SIGN_EXTEND_INT: - ins = INS_movsxd; - insSize = 4; - break; -#endif - default: - assert(desc.ExtendKind() == GenIntCastDesc::COPY); - assert(srcReg != dstReg); - ins = INS_mov; - insSize = desc.ExtendSrcSize(); - break; - } - - if (canSkip) - { - JITDUMP("\n -- suppressing emission as previous instruction already properly extends.\n"); - } - else - { - emit->emitIns_R_R(ins, EA_ATTR(insSize), dstReg, srcReg); - } - } - - genProduceReg(cast); -} - -//------------------------------------------------------------------------ -// genFloatToFloatCast: Generate code for a cast between float and double -// -// Arguments: -// treeNode - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// Cast is a non-overflow conversion. -// The treeNode must have an assigned register. -// The cast is between float and double or vice versa. -// -void CodeGen::genFloatToFloatCast(GenTree* treeNode) -{ - // float <--> double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); - assert(!treeNode->gtOverflow()); - - regNumber targetReg = treeNode->GetRegNum(); - assert(genIsValidFloatReg(targetReg)); - - GenTree* op1 = treeNode->AsOp()->gtOp1; -#ifdef DEBUG - // If not contained, must be a valid float reg. - if (op1->isUsedFromReg()) - { - assert(genIsValidFloatReg(op1->GetRegNum())); - } -#endif - - var_types dstType = treeNode->CastToType(); - var_types srcType = op1->TypeGet(); - assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); - - genConsumeOperands(treeNode->AsOp()); - if (srcType == dstType && (op1->isUsedFromReg() && (targetReg == op1->GetRegNum()))) - { - // source and destinations types are the same and also reside in the same register. - // we just need to consume and produce the reg in this case. - ; - } - else - { - instruction ins = ins_FloatConv(dstType, srcType); - GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); - } - - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genIntToFloatCast: Generate code to cast an int/long to float/double -// -// Arguments: -// treeNode - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// Cast is a non-overflow conversion. -// The treeNode must have an assigned register. -// SrcType= int32/uint32/int64/uint64 and DstType=float/double. -// -void CodeGen::genIntToFloatCast(GenTree* treeNode) -{ - // int type --> float/double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); - assert(!treeNode->gtOverflow()); - - regNumber targetReg = treeNode->GetRegNum(); - assert(genIsValidFloatReg(targetReg)); - - GenTree* op1 = treeNode->AsOp()->gtOp1; -#ifdef DEBUG - if (op1->isUsedFromReg()) - { - assert(genIsValidIntReg(op1->GetRegNum())); - } -#endif - - var_types dstType = treeNode->CastToType(); - var_types srcType = op1->TypeGet(); - assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); - -#if !defined(TARGET_64BIT) - // We expect morph to replace long to float/double casts with helper calls - noway_assert(!varTypeIsLong(srcType)); -#endif // !defined(TARGET_64BIT) - - // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we - // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except - // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered - // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack, - // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type - // temp and using temp as operand of cast operation. - if (srcType == TYP_BYREF) - { - noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR); - srcType = TYP_I_IMPL; - } - - // force the srcType to unsigned if GT_UNSIGNED flag is set - if (treeNode->gtFlags & GTF_UNSIGNED) - { - srcType = genUnsignedType(srcType); - } - - noway_assert(!varTypeIsGC(srcType)); - - // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long). - // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect - // either the front-end or lowering phase to have generated two levels of cast. - // The first one is for widening smaller int type to int32 and the second one is - // to the float/double. - emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); - noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG)))); - - // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions - // here since they should have been lowered apropriately. - noway_assert(srcType != TYP_UINT); - noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT)); - - // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used - // which does a partial write to lower 4/8 bytes of xmm register keeping the other - // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop, - // the partial write could introduce a false dependency and could cause a stall - // if there are further uses of xmmReg. We have such a case occurring with a - // customer reported version of SpectralNorm benchmark, resulting in 2x perf - // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before - // cvtsi2ss/sd instruction. - - genConsumeOperands(treeNode->AsOp()); - GetEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->GetRegNum(), treeNode->GetRegNum()); - - // Note that here we need to specify srcType that will determine - // the size of source reg/mem operand and rex.w prefix. - instruction ins = ins_FloatConv(dstType, TYP_INT); - GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); - - // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction - // will interpret ULONG value as LONG. Hence we need to adjust the - // result if sign-bit of srcType is set. - if (srcType == TYP_ULONG) - { - // The instruction sequence below is less accurate than what clang - // and gcc generate. However, we keep the current sequence for backward compatibility. - // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule - // should be also updated for consistent conversion result. - assert(dstType == TYP_DOUBLE); - assert(op1->isUsedFromReg()); - - // Set the flags without modifying op1. - // test op1Reg, op1Reg - inst_RV_RV(INS_test, op1->GetRegNum(), op1->GetRegNum(), srcType); - - // No need to adjust result if op1 >= 0 i.e. positive - // Jge label - BasicBlock* label = genCreateTempLabel(); - inst_JMP(EJ_jge, label); - - // Adjust the result - // result = result + 0x43f00000 00000000 - // addsd resultReg, 0x43f00000 00000000 - CORINFO_FIELD_HANDLE* cns = &u8ToDblBitmask; - if (*cns == nullptr) - { - double d; - static_assert_no_msg(sizeof(double) == sizeof(__int64)); - *((__int64*)&d) = 0x43f0000000000000LL; - - *cns = GetEmitter()->emitFltOrDblConst(d, EA_8BYTE); - } - GetEmitter()->emitIns_R_C(INS_addsd, EA_8BYTE, treeNode->GetRegNum(), *cns, 0); - - genDefineTempLabel(label); - } - - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genFloatToIntCast: Generate code to cast float/double to int/long -// -// Arguments: -// treeNode - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// Cast is a non-overflow conversion. -// The treeNode must have an assigned register. -// SrcType=float/double and DstType= int32/uint32/int64/uint64 -// -// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64 -// -void CodeGen::genFloatToIntCast(GenTree* treeNode) -{ - // we don't expect to see overflow detecting float/double --> int type conversions here - // as they should have been converted into helper calls by front-end. - assert(treeNode->OperGet() == GT_CAST); - assert(!treeNode->gtOverflow()); - - regNumber targetReg = treeNode->GetRegNum(); - assert(genIsValidIntReg(targetReg)); - - GenTree* op1 = treeNode->AsOp()->gtOp1; -#ifdef DEBUG - if (op1->isUsedFromReg()) - { - assert(genIsValidFloatReg(op1->GetRegNum())); - } -#endif - - var_types dstType = treeNode->CastToType(); - var_types srcType = op1->TypeGet(); - assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); - - // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG). - // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the - // front-end or lowering phase to have generated two levels of cast. The first one is - // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to - // the required smaller int type. - emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); - noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); - - // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase. - noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); - - // If the dstType is TYP_UINT, we have 32-bits to encode the - // float number. Any of 33rd or above bits can be the sign bit. - // To achieve it we pretend as if we are converting it to a long. - if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT)))) - { - dstType = TYP_LONG; - } - - // Note that we need to specify dstType here so that it will determine - // the size of destination integer register and also the rex.w prefix. - genConsumeOperands(treeNode->AsOp()); - instruction ins = ins_FloatConv(TYP_INT, srcType); - GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genCkfinite: Generate code for ckfinite opcode. -// -// Arguments: -// treeNode - The GT_CKFINITE node -// -// Return Value: -// None. -// -// Assumptions: -// GT_CKFINITE node has reserved an internal register. -// -// TODO-XArch-CQ - mark the operand as contained if known to be in -// memory (e.g. field or an array element). -// -void CodeGen::genCkfinite(GenTree* treeNode) -{ - assert(treeNode->OperGet() == GT_CKFINITE); - - GenTree* op1 = treeNode->AsOp()->gtOp1; - var_types targetType = treeNode->TypeGet(); - int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent. - regNumber targetReg = treeNode->GetRegNum(); - - // Extract exponent into a register. - regNumber tmpReg = treeNode->GetSingleTempReg(); - - genConsumeReg(op1); - -#ifdef TARGET_64BIT - - // Copy the floating-point value to an integer register. If we copied a float to a long, then - // right-shift the value so the high 32 bits of the floating-point value sit in the low 32 - // bits of the integer register. - regNumber srcReg = op1->GetRegNum(); - var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG); - inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType); - if (targetType == TYP_DOUBLE) - { - // right shift by 32 bits to get to exponent. - inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32); - } - - // Mask exponent with all 1's and check if the exponent is all 1's - inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); - inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); - - // If exponent is all 1's, throw ArithmeticException - genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); - - // if it is a finite value copy it to targetReg - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } - -#else // !TARGET_64BIT - - // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register. - // There is no easy way to do this. To not require an extra register, we'll use shuffles - // to move the high 32 bits into the low 32 bits, then shuffle it back, since we - // need to produce the value into the target register. - // - // For TYP_DOUBLE, we'll generate (for targetReg != op1->GetRegNum()): - // movaps targetReg, op1->GetRegNum() - // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY - // mov_xmm2i tmpReg, targetReg // tmpReg <= Y - // and tmpReg, - // cmp tmpReg, - // je - // movaps targetReg, op1->GetRegNum() // copy the value again, instead of un-shuffling it - // - // For TYP_DOUBLE with (targetReg == op1->GetRegNum()): - // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY - // mov_xmm2i tmpReg, targetReg // tmpReg <= Y - // and tmpReg, - // cmp tmpReg, - // je - // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX - // - // For TYP_FLOAT, it's the same as TARGET_64BIT: - // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits - // and tmpReg, - // cmp tmpReg, - // je - // movaps targetReg, op1->GetRegNum() // only if targetReg != op1->GetRegNum() - - regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp. - - if (targetType == TYP_DOUBLE) - { - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } - inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); - copyToTmpSrcReg = targetReg; - } - else - { - copyToTmpSrcReg = op1->GetRegNum(); - } - - // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point - // value, no matter the floating-point type. - inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT); - - // Mask exponent with all 1's and check if the exponent is all 1's - inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); - inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); - - // If exponent is all 1's, throw ArithmeticException - genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); - - if (targetReg != op1->GetRegNum()) - { - // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched, - // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE - // than re-shuffling the targetReg. - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } - else if (targetType == TYP_DOUBLE) - { - // We need to re-shuffle the targetReg to get the correct result. - inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); - } - -#endif // !TARGET_64BIT - - genProduceReg(treeNode); -} - -#ifdef TARGET_AMD64 -int CodeGenInterface::genSPtoFPdelta() const -{ - int delta; - -#ifdef UNIX_AMD64_ABI - - // We require frame chaining on Unix to support native tool unwinding (such as - // unwinding by the native debugger). We have a CLR-only extension to the - // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240. - // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated. - delta = genTotalFrameSize(); - -#else // !UNIX_AMD64_ABI - - // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if - // RBP needs to be reported in unwind codes. This case would arise for methods - // with localloc. - if (compiler->compLocallocUsed) - { - // We cannot base delta computation on compLclFrameSize since it changes from - // tentative to final frame layout and hence there is a possibility of - // under-estimating offset of vars from FP, which in turn results in under- - // estimating instruction size. - // - // To be predictive and so as never to under-estimate offset of vars from FP - // we will always position FP at min(240, outgoing arg area size). - delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize); - } - else if (compiler->opts.compDbgEnC) - { - // vm assumption on EnC methods is that rsp and rbp are equal - delta = 0; - } - else - { - delta = genTotalFrameSize(); - } - -#endif // !UNIX_AMD64_ABI - - return delta; -} - -//--------------------------------------------------------------------- -// genTotalFrameSize - return the total size of the stack frame, including local size, -// callee-saved register size, etc. For AMD64, this does not include the caller-pushed -// return address. -// -// Return value: -// Total frame size -// - -int CodeGenInterface::genTotalFrameSize() const -{ - assert(!IsUninitialized(compiler->compCalleeRegsPushed)); - - int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; - - assert(totalFrameSize >= 0); - return totalFrameSize; -} - -//--------------------------------------------------------------------- -// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. -// This number is going to be negative, since the Caller-SP is at a higher -// address than the frame pointer. -// -// There must be a frame pointer to call this function! -// -// We can't compute this directly from the Caller-SP, since the frame pointer -// is based on a maximum delta from Initial-SP, so first we find SP, then -// compute the FP offset. - -int CodeGenInterface::genCallerSPtoFPdelta() const -{ - assert(isFramePointerUsed()); - int callerSPtoFPdelta; - - callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); - - assert(callerSPtoFPdelta <= 0); - return callerSPtoFPdelta; -} - -//--------------------------------------------------------------------- -// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. -// -// This number will be negative. - -int CodeGenInterface::genCallerSPtoInitialSPdelta() const -{ - int callerSPtoSPdelta = 0; - - callerSPtoSPdelta -= genTotalFrameSize(); - callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address - - // compCalleeRegsPushed does not account for the frame pointer - // TODO-Cleanup: shouldn't this be part of genTotalFrameSize? - if (isFramePointerUsed()) - { - callerSPtoSPdelta -= REGSIZE_BYTES; - } - - assert(callerSPtoSPdelta <= 0); - return callerSPtoSPdelta; -} -#endif // TARGET_AMD64 - -//----------------------------------------------------------------------------------------- -// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask" -// -// Arguments: -// treeNode - tree node -// -// Return value: -// None -// -// Assumptions: -// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs() -// ii) tree type is floating point type. -// iii) caller of this routine needs to call genProduceReg() -void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) -{ - regNumber targetReg = treeNode->GetRegNum(); - regNumber operandReg = genConsumeReg(treeNode->gtGetOp1()); - emitAttr size = emitTypeSize(treeNode); - - assert(varTypeIsFloating(treeNode->TypeGet())); - assert(treeNode->gtGetOp1()->isUsedFromReg()); - - CORINFO_FIELD_HANDLE* maskFld = nullptr; - UINT64 mask = 0; - instruction ins = INS_invalid; - - if (treeNode->OperIs(GT_NEG)) - { - // Neg(x) = flip the sign bit. - // Neg(f) = f ^ 0x80000000 x4 (packed) - // Neg(d) = d ^ 0x8000000000000000 x2 (packed) - ins = INS_xorps; - mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL; - maskFld = treeNode->TypeIs(TYP_FLOAT) ? &negBitmaskFlt : &negBitmaskDbl; - } - else if (treeNode->OperIs(GT_INTRINSIC)) - { - assert(treeNode->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs); - // Abs(x) = set sign-bit to zero - // Abs(f) = f & 0x7fffffff x4 (packed) - // Abs(d) = d & 0x7fffffffffffffff x2 (packed) - ins = INS_andps; - mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7fffffff7fffffffUL : 0x7fffffffffffffffUL; - maskFld = treeNode->TypeIs(TYP_FLOAT) ? &absBitmaskFlt : &absBitmaskDbl; - } - else - { - assert(!"genSSE2BitwiseOp: unsupported oper"); - } - - if (*maskFld == nullptr) - { - UINT64 maskPack[] = {mask, mask}; - *maskFld = GetEmitter()->emitBlkConst(&maskPack, 16, 16, treeNode->TypeGet()); - } - - GetEmitter()->emitIns_SIMD_R_R_C(ins, size, targetReg, operandReg, *maskFld, 0); -} - -//----------------------------------------------------------------------------------------- -// genSSE41RoundOp - generate SSE41 code for the given tree as a round operation -// -// Arguments: -// treeNode - tree node -// -// Return value: -// None -// -// Assumptions: -// i) SSE4.1 is supported by the underlying hardware -// ii) treeNode oper is a GT_INTRINSIC -// iii) treeNode type is a floating point type -// iv) treeNode is not used from memory -// v) tree oper is NI_System_Math{F}_Round, _Ceiling, or _Floor -// vi) caller of this routine needs to call genProduceReg() -void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) -{ - // i) SSE4.1 is supported by the underlying hardware - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE41)); - - // ii) treeNode oper is a GT_INTRINSIC - assert(treeNode->OperGet() == GT_INTRINSIC); - - GenTree* srcNode = treeNode->gtGetOp1(); - - // iii) treeNode type is floating point type - assert(varTypeIsFloating(srcNode)); - assert(srcNode->TypeGet() == treeNode->TypeGet()); - - // iv) treeNode is not used from memory - assert(!treeNode->isUsedFromMemory()); - - genConsumeOperands(treeNode); - - instruction ins = (treeNode->TypeGet() == TYP_FLOAT) ? INS_roundss : INS_roundsd; - emitAttr size = emitTypeSize(treeNode); - - regNumber dstReg = treeNode->GetRegNum(); - - unsigned ival = 0; - - // v) tree oper is NI_System_Math{F}_Round, _Ceiling, or _Floor - switch (treeNode->AsIntrinsic()->gtIntrinsicName) - { - case NI_System_Math_Round: - ival = 4; - break; - - case NI_System_Math_Ceiling: - ival = 10; - break; - - case NI_System_Math_Floor: - ival = 9; - break; - - default: - ins = INS_invalid; - assert(!"genSSE41RoundOp: unsupported intrinsic"); - unreached(); - } - - if (srcNode->isContained() || srcNode->isUsedFromSpillTemp()) - { - emitter* emit = GetEmitter(); - - TempDsc* tmpDsc = nullptr; - unsigned varNum = BAD_VAR_NUM; - unsigned offset = (unsigned)-1; - - if (srcNode->isUsedFromSpillTemp()) - { - assert(srcNode->IsRegOptional()); - - tmpDsc = getSpillTempDsc(srcNode); - varNum = tmpDsc->tdTempNum(); - offset = 0; - - regSet.tmpRlsTemp(tmpDsc); - } - else if (srcNode->isIndir()) - { - GenTreeIndir* memIndir = srcNode->AsIndir(); - GenTree* memBase = memIndir->gtOp1; - - switch (memBase->OperGet()) - { - case GT_LCL_VAR_ADDR: - case GT_LCL_FLD_ADDR: - { - assert(memBase->isContained()); - varNum = memBase->AsLclVarCommon()->GetLclNum(); - offset = memBase->AsLclVarCommon()->GetLclOffs(); - - // Ensure that all the GenTreeIndir values are set to their defaults. - assert(memBase->GetRegNum() == REG_NA); - assert(!memIndir->HasIndex()); - assert(memIndir->Scale() == 1); - assert(memIndir->Offset() == 0); - - break; - } - - case GT_CLS_VAR_ADDR: - { - emit->emitIns_R_C_I(ins, size, dstReg, memBase->AsClsVar()->gtClsVarHnd, 0, ival); - return; - } - - default: - { - emit->emitIns_R_A_I(ins, size, dstReg, memIndir, ival); - return; - } - } - } - else - { - switch (srcNode->OperGet()) - { - case GT_CNS_DBL: - { - GenTreeDblCon* dblConst = srcNode->AsDblCon(); - CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst->gtDconVal, emitTypeSize(dblConst)); - - emit->emitIns_R_C_I(ins, size, dstReg, hnd, 0, ival); - return; - } - - case GT_LCL_FLD: - varNum = srcNode->AsLclFld()->GetLclNum(); - offset = srcNode->AsLclFld()->GetLclOffs(); - break; - - case GT_LCL_VAR: - { - assert(srcNode->IsRegOptional() || - !compiler->lvaTable[srcNode->AsLclVar()->GetLclNum()].lvIsRegCandidate()); - - varNum = srcNode->AsLclVar()->GetLclNum(); - offset = 0; - break; - } - - default: - unreached(); - break; - } - } - - // Ensure we got a good varNum and offset. - // We also need to check for `tmpDsc != nullptr` since spill temp numbers - // are negative and start with -1, which also happens to be BAD_VAR_NUM. - assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); - assert(offset != (unsigned)-1); - - emit->emitIns_R_S_I(ins, size, dstReg, varNum, offset, ival); - } - else - { - inst_RV_RV_IV(ins, size, dstReg, srcNode->GetRegNum(), ival); - } -} - -//--------------------------------------------------------------------- -// genIntrinsic - generate code for a given intrinsic -// -// Arguments -// treeNode - the GT_INTRINSIC node -// -// Return value: -// None -// -void CodeGen::genIntrinsic(GenTree* treeNode) -{ - // Handle intrinsics that can be implemented by target-specific instructions - switch (treeNode->AsIntrinsic()->gtIntrinsicName) - { - case NI_System_Math_Abs: - genSSE2BitwiseOp(treeNode); - break; - - case NI_System_Math_Ceiling: - case NI_System_Math_Floor: - case NI_System_Math_Round: - genSSE41RoundOp(treeNode->AsOp()); - break; - - case NI_System_Math_Sqrt: - { - // Both operand and its result must be of the same floating point type. - GenTree* srcNode = treeNode->AsOp()->gtOp1; - assert(varTypeIsFloating(srcNode)); - assert(srcNode->TypeGet() == treeNode->TypeGet()); - - genConsumeOperands(treeNode->AsOp()); - GetEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, srcNode); - break; - } - - default: - assert(!"genIntrinsic: Unsupported intrinsic"); - unreached(); - } - - genProduceReg(treeNode); -} - -//---------------------------------------------------------------------- -// genBitCast - Generate the instruction to move a value between register files -// -// Arguments -// targetType - the destination type -// targetReg - the destination register -// srcType - the source type -// srcReg - the source register -// -void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg) -{ - const bool srcFltReg = varTypeUsesFloatReg(srcType) || varTypeIsSIMD(srcType); - assert(srcFltReg == genIsValidFloatReg(srcReg)); - const bool dstFltReg = varTypeUsesFloatReg(targetType) || varTypeIsSIMD(targetType); - assert(dstFltReg == genIsValidFloatReg(targetReg)); - if (srcFltReg != dstFltReg) - { - inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType); - } - else if (targetReg != srcReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, srcReg, targetType); - } -} - -//---------------------------------------------------------------------- -// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained -// -// Arguments -// treeNode - the GT_BITCAST for which we're generating code -// -void CodeGen::genCodeForBitCast(GenTreeOp* treeNode) -{ - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); - GenTree* op1 = treeNode->gtGetOp1(); - genConsumeRegs(op1); - - if (op1->isContained()) - { - assert(op1->IsLocal() || op1->isIndir()); - if (genIsRegCandidateLocal(op1)) - { - unsigned lclNum = op1->AsLclVar()->GetLclNum(); - GetEmitter()->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lclNum)), - emitTypeSize(treeNode), targetReg, lclNum, 0); - } - else - { - op1->gtType = treeNode->TypeGet(); - op1->SetRegNum(targetReg); - op1->ClearContained(); - JITDUMP("Changing type of BITCAST source to load directly."); - genCodeForTreeNode(op1); - } - } - else - { - genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum()); - } - genProduceReg(treeNode); -} - -//-------------------------------------------------------------------------- // -// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg. -// -// Arguments -// treeNode - the GT_PUTARG_STK node -// -// Return value: -// The number of the base variable. -// -// Note: -// If tail call the outgoing args are placed in the caller's incoming arg stack space. -// Otherwise, they go in the outgoing arg area on the current frame. -// -// On Windows the caller always creates slots (homing space) in its frame for the -// first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0. -// For System V systems there is no such calling convention requirement, and the code needs to find -// the first stack passed argument from the caller. This is done by iterating over -// all the lvParam variables and finding the first with GetArgReg() equals to REG_STK. -// -unsigned CodeGen::getBaseVarForPutArgStk(GenTree* treeNode) -{ - assert(treeNode->OperGet() == GT_PUTARG_STK); - - unsigned baseVarNum; - - // Whether to setup stk arg in incoming or out-going arg area? - // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. - // All other calls - stk arg is setup in out-going arg area. - if (treeNode->AsPutArgStk()->putInIncomingArgArea()) - { - // See the note in the function header re: finding the first stack passed argument. - baseVarNum = getFirstArgWithStackSlot(); - assert(baseVarNum != BAD_VAR_NUM); - -#ifdef DEBUG - // This must be a fast tail call. - assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); - - // Since it is a fast tail call, the existence of first incoming arg is guaranteed - // because fast tail call requires that in-coming arg area of caller is >= out-going - // arg area required for tail call. - LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]); - assert(varDsc != nullptr); - -#ifdef UNIX_AMD64_ABI - assert(!varDsc->lvIsRegArg && varDsc->GetArgReg() == REG_STK); -#else // !UNIX_AMD64_ABI - // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0. - assert(varDsc->lvIsRegArg && (varDsc->GetArgReg() == REG_ARG_0 || varDsc->GetArgReg() == REG_FLTARG_0)); -#endif // !UNIX_AMD64_ABI -#endif // !DEBUG - } - else - { -#if FEATURE_FIXED_OUT_ARGS - baseVarNum = compiler->lvaOutgoingArgSpaceVar; -#else // !FEATURE_FIXED_OUT_ARGS - assert(!"No BaseVarForPutArgStk on x86"); - baseVarNum = BAD_VAR_NUM; -#endif // !FEATURE_FIXED_OUT_ARGS - } - - return baseVarNum; -} - -//--------------------------------------------------------------------- -// genAlignStackBeforeCall: Align the stack if necessary before a call. -// -// Arguments: -// putArgStk - the putArgStk node. -// -void CodeGen::genAlignStackBeforeCall(GenTreePutArgStk* putArgStk) -{ -#if defined(UNIX_X86_ABI) - - genAlignStackBeforeCall(putArgStk->gtCall); - -#endif // UNIX_X86_ABI -} - -//--------------------------------------------------------------------- -// genAlignStackBeforeCall: Align the stack if necessary before a call. -// -// Arguments: -// call - the call node. -// -void CodeGen::genAlignStackBeforeCall(GenTreeCall* call) -{ -#if defined(UNIX_X86_ABI) - - // Have we aligned the stack yet? - if (!call->fgArgInfo->IsStkAlignmentDone()) - { - // We haven't done any stack alignment yet for this call. We might need to create - // an alignment adjustment, even if this function itself doesn't have any stack args. - // This can happen if this function call is part of a nested call sequence, and the outer - // call has already pushed some arguments. - - unsigned stkLevel = genStackLevel + call->fgArgInfo->GetStkSizeBytes(); - call->fgArgInfo->ComputeStackAlignment(stkLevel); - - unsigned padStkAlign = call->fgArgInfo->GetStkAlign(); - if (padStkAlign != 0) - { - // Now generate the alignment - inst_RV_IV(INS_sub, REG_SPBASE, padStkAlign, EA_PTRSIZE); - AddStackLevel(padStkAlign); - AddNestedAlignment(padStkAlign); - } - - call->fgArgInfo->SetStkAlignmentDone(); - } - -#endif // UNIX_X86_ABI -} - -//--------------------------------------------------------------------- -// genRemoveAlignmentAfterCall: After a call, remove the alignment -// added before the call, if any. -// -// Arguments: -// call - the call node. -// bias - additional stack adjustment -// -// Note: -// When bias > 0, caller should adjust stack level appropriately as -// bias is not considered when adjusting stack level. -// -void CodeGen::genRemoveAlignmentAfterCall(GenTreeCall* call, unsigned bias) -{ -#if defined(TARGET_X86) -#if defined(UNIX_X86_ABI) - // Put back the stack pointer if there was any padding for stack alignment - unsigned padStkAlign = call->fgArgInfo->GetStkAlign(); - unsigned padStkAdjust = padStkAlign + bias; - - if (padStkAdjust != 0) - { - inst_RV_IV(INS_add, REG_SPBASE, padStkAdjust, EA_PTRSIZE); - SubtractStackLevel(padStkAlign); - SubtractNestedAlignment(padStkAlign); - } -#else // UNIX_X86_ABI - if (bias != 0) - { - genAdjustSP(bias); - } -#endif // !UNIX_X86_ABI_ -#else // TARGET_X86 - assert(bias == 0); -#endif // !TARGET_X86 -} - -#ifdef TARGET_X86 - -//--------------------------------------------------------------------- -// genAdjustStackForPutArgStk: -// adjust the stack pointer for a putArgStk node if necessary. -// -// Arguments: -// putArgStk - the putArgStk node. -// -// Returns: true if the stack pointer was adjusted; false otherwise. -// -// Notes: -// Sets `m_pushStkArg` to true if the stack arg needs to be pushed, -// false if the stack arg needs to be stored at the current stack -// pointer address. This is exactly the opposite of the return value -// of this function. -// -bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) -{ - const unsigned argSize = putArgStk->GetStackByteSize(); - GenTree* source = putArgStk->gtGetOp1(); - -#ifdef FEATURE_SIMD - if (!source->OperIs(GT_FIELD_LIST) && varTypeIsSIMD(source)) - { - inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); - AddStackLevel(argSize); - m_pushStkArg = false; - return true; - } -#endif // FEATURE_SIMD - - // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack. - // This is set in Lowering, and is true if and only if: - // - This argument contains any GC pointers OR - // - It is a GT_FIELD_LIST OR - // - It is less than 16 bytes in size. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - switch (putArgStk->gtPutArgStkKind) - { - case GenTreePutArgStk::Kind::RepInstr: - case GenTreePutArgStk::Kind::Unroll: - assert(!source->AsObj()->GetLayout()->HasGCPtr() && (argSize >= 16)); - break; - case GenTreePutArgStk::Kind::Push: - case GenTreePutArgStk::Kind::PushAllSlots: - assert(source->OperIs(GT_FIELD_LIST) || source->AsObj()->GetLayout()->HasGCPtr() || (argSize < 16)); - break; - case GenTreePutArgStk::Kind::Invalid: - default: - assert(!"Uninitialized GenTreePutArgStk::Kind"); - break; - } -#endif // DEBUG - - if (putArgStk->isPushKind()) - { - m_pushStkArg = true; - return false; - } - else - { - m_pushStkArg = false; - - // If argSize is large, we need to probe the stack like we do in the prolog (genAllocLclFrame) - // or for localloc (genLclHeap), to ensure we touch the stack pages sequentially, and don't miss - // the stack guard pages. The prolog probes, but we don't know at this point how much higher - // the last probed stack pointer value is. We default a threshold. Any size below this threshold - // we are guaranteed the stack has been probed. Above this threshold, we don't know. The threshold - // should be high enough to cover all common cases. Increasing the threshold means adding a few - // more "lowest address of stack" probes in the prolog. Since this is relatively rare, add it to - // stress modes. - - if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) || - compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5)) - { - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, REG_NA); - } - else - { - inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); - } - - AddStackLevel(argSize); - return true; - } -} - -//--------------------------------------------------------------------- -// genPutArgStkFieldList - generate code for passing a GT_FIELD_LIST arg on the stack. -// -// Arguments -// treeNode - the GT_PUTARG_STK node whose op1 is a GT_FIELD_LIST -// -// Return value: -// None -// -void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) -{ - GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList(); - assert(fieldList != nullptr); - - // Set m_pushStkArg and pre-adjust the stack if necessary. - const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk); - - // For now, we only support the "push" case; we will push a full slot for the first field of each slot - // within the struct. - assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg); - - // If we have pre-adjusted the stack and are simply storing the fields in order, set the offset to 0. - // (Note that this mode is not currently being used.) - // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them - // in reverse order, so we start with the current field offset at the size of the struct arg (which must be - // a multiple of the target pointer size). - unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->GetStackByteSize(); - unsigned prevFieldOffset = currentOffset; - regNumber intTmpReg = REG_NA; - regNumber simdTmpReg = REG_NA; - if (putArgStk->AvailableTempRegCount() != 0) - { - regMaskTP rsvdRegs = putArgStk->gtRsvdRegs; - if ((rsvdRegs & RBM_ALLINT) != 0) - { - intTmpReg = putArgStk->GetSingleTempReg(RBM_ALLINT); - assert(genIsValidIntReg(intTmpReg)); - } - if ((rsvdRegs & RBM_ALLFLOAT) != 0) - { - simdTmpReg = putArgStk->GetSingleTempReg(RBM_ALLFLOAT); - assert(genIsValidFloatReg(simdTmpReg)); - } - assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1)); - } - - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - GenTree* const fieldNode = use.GetNode(); - const unsigned fieldOffset = use.GetOffset(); - var_types fieldType = use.GetType(); - - // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the - // field list in descending order by offset. - assert(!varTypeIsLong(fieldType)); - assert(fieldOffset <= prevFieldOffset); - - // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately - // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been - // assigned a register, and which is therefore contained. - // Unlike genConsumeReg(), it handles the case where no registers are being consumed. - genConsumeRegs(fieldNode); - regNumber argReg = fieldNode->isUsedFromSpillTemp() ? REG_NA : fieldNode->GetRegNum(); - - // If the field is slot-like, we can use a push instruction to store the entire register no matter the type. - // - // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up - // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must - // not require rounding. - // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise - // able to detect stores into the outgoing argument area of the stack on x86. - const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4); - int adjustment = roundUp(currentOffset - fieldOffset, 4); - if (fieldIsSlot && !varTypeIsSIMD(fieldType)) - { - fieldType = genActualType(fieldType); - unsigned pushSize = genTypeSize(fieldType); - assert((pushSize % 4) == 0); - adjustment -= pushSize; - while (adjustment != 0) - { - inst_IV(INS_push, 0); - currentOffset -= pushSize; - AddStackLevel(pushSize); - adjustment -= pushSize; - } - m_pushStkArg = true; - } - else - { - m_pushStkArg = false; - - // We always "push" floating point fields (i.e. they are full slot values that don't - // require special handling). - assert(varTypeIsIntegralOrI(fieldNode) || varTypeIsSIMD(fieldNode)); - - // If we can't push this field, it needs to be in a register so that we can store - // it to the stack location. - if (adjustment != 0) - { - // This moves the stack pointer to fieldOffset. - // For this case, we must adjust the stack and generate stack-relative stores rather than pushes. - // Adjust the stack pointer to the next slot boundary. - inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE); - currentOffset -= adjustment; - AddStackLevel(adjustment); - } - - // Does it need to be in a byte register? - // If so, we'll use intTmpReg, which must have been allocated as a byte register. - // If it's already in a register, but not a byteable one, then move it. - if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0))) - { - assert(intTmpReg != REG_NA); - noway_assert((genRegMask(intTmpReg) & RBM_BYTE_REGS) != 0); - if (argReg != REG_NA) - { - inst_RV_RV(INS_mov, intTmpReg, argReg, fieldType); - argReg = intTmpReg; - } - } - } - - if (argReg == REG_NA) - { - if (m_pushStkArg) - { - if (fieldNode->isUsedFromSpillTemp()) - { - assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD? - assert(fieldNode->IsRegOptional()); - TempDsc* tmp = getSpillTempDsc(fieldNode); - GetEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0); - regSet.tmpRlsTemp(tmp); - } - else - { - assert(varTypeIsIntegralOrI(fieldNode)); - switch (fieldNode->OperGet()) - { - case GT_LCL_VAR: - inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet())); - break; - case GT_CNS_INT: - if (fieldNode->IsIconHandle()) - { - inst_IV_handle(INS_push, fieldNode->AsIntCon()->gtIconVal); - } - else - { - inst_IV(INS_push, fieldNode->AsIntCon()->gtIconVal); - } - break; - default: - unreached(); - } - } - currentOffset -= TARGET_POINTER_SIZE; - AddStackLevel(TARGET_POINTER_SIZE); - } - else - { - // The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack. - assert(varTypeIsIntegralOrI(fieldNode)); - switch (fieldNode->OperGet()) - { - case GT_LCL_VAR: - inst_RV_TT(INS_mov, intTmpReg, fieldNode); - break; - case GT_CNS_INT: - genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode); - break; - default: - unreached(); - } - genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset); - } - } - else - { -#if defined(FEATURE_SIMD) - if (fieldType == TYP_SIMD12) - { - assert(genIsValidFloatReg(simdTmpReg)); - genStoreSIMD12ToStack(argReg, simdTmpReg); - } - else -#endif // defined(FEATURE_SIMD) - { - genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset); - } - if (m_pushStkArg) - { - // We always push a slot-rounded size - currentOffset -= genTypeSize(fieldType); - } - } - - prevFieldOffset = fieldOffset; - } - if (currentOffset != 0) - { - // We don't expect padding at the beginning of a struct, but it could happen with explicit layout. - inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE); - AddStackLevel(currentOffset); - } -} -#endif // TARGET_X86 - -//--------------------------------------------------------------------- -// genPutArgStk - generate code for passing an arg on the stack. -// -// Arguments -// treeNode - the GT_PUTARG_STK node -// targetType - the type of the treeNode -// -// Return value: -// None -// -void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) -{ - GenTree* data = putArgStk->gtOp1; - var_types targetType = genActualType(data->TypeGet()); - -#ifdef TARGET_X86 - - genAlignStackBeforeCall(putArgStk); - - if ((data->OperGet() != GT_FIELD_LIST) && varTypeIsStruct(targetType)) - { - (void)genAdjustStackForPutArgStk(putArgStk); - genPutStructArgStk(putArgStk); - return; - } - - // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LISTs of TYP_INT. - assert(targetType != TYP_LONG); - - const unsigned argSize = putArgStk->GetStackByteSize(); - assert((argSize % TARGET_POINTER_SIZE) == 0); - - if (data->isContainedIntOrIImmed()) - { - if (data->IsIconHandle()) - { - inst_IV_handle(INS_push, data->AsIntCon()->gtIconVal); - } - else - { - inst_IV(INS_push, data->AsIntCon()->gtIconVal); - } - AddStackLevel(argSize); - } - else if (data->OperGet() == GT_FIELD_LIST) - { - genPutArgStkFieldList(putArgStk); - } - else - { - // We should not see any contained nodes that are not immediates. - assert(data->isUsedFromReg()); - genConsumeReg(data); - genPushReg(targetType, data->GetRegNum()); - } -#else // !TARGET_X86 - { - unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk); - -#ifdef UNIX_AMD64_ABI - - if (data->OperIs(GT_FIELD_LIST)) - { - genPutArgStkFieldList(putArgStk, baseVarNum); - return; - } - else if (varTypeIsStruct(targetType)) - { - m_stkArgVarNum = baseVarNum; - m_stkArgOffset = putArgStk->getArgOffset(); - genPutStructArgStk(putArgStk); - m_stkArgVarNum = BAD_VAR_NUM; - return; - } -#endif // UNIX_AMD64_ABI - - noway_assert(targetType != TYP_STRUCT); - - // Get argument offset on stack. - // Here we cross check that argument offset hasn't changed from lowering to codegen since - // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. - unsigned argOffset = putArgStk->getArgOffset(); - -#ifdef DEBUG - fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(putArgStk->gtCall, putArgStk); - assert(curArgTabEntry != nullptr); - assert(argOffset == curArgTabEntry->slotNum * TARGET_POINTER_SIZE); -#endif - - if (data->isContainedIntOrIImmed()) - { - GetEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset, - (int)data->AsIntConCommon()->IconValue()); - } - else - { - assert(data->isUsedFromReg()); - genConsumeReg(data); - GetEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->GetRegNum(), baseVarNum, - argOffset); - } - } -#endif // !TARGET_X86 -} - -//--------------------------------------------------------------------- -// genPutArgReg - generate code for a GT_PUTARG_REG node -// -// Arguments -// tree - the GT_PUTARG_REG node -// -// Return value: -// None -// -void CodeGen::genPutArgReg(GenTreeOp* tree) -{ - assert(tree->OperIs(GT_PUTARG_REG)); - - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->GetRegNum(); - -#ifndef UNIX_AMD64_ABI - assert(targetType != TYP_STRUCT); -#endif // !UNIX_AMD64_ABI - - GenTree* op1 = tree->gtOp1; - genConsumeReg(op1); - - // If child node is not already in the register we need, move it - if (targetReg != op1->GetRegNum()) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); - } - - genProduceReg(tree); -} - -#ifdef TARGET_X86 -// genPushReg: Push a register value onto the stack and adjust the stack level -// -// Arguments: -// type - the type of value to be stored -// reg - the register containing the value -// -// Notes: -// For TYP_LONG, the srcReg must be a floating point register. -// Otherwise, the register type must be consistent with the given type. -// -void CodeGen::genPushReg(var_types type, regNumber srcReg) -{ - unsigned size = genTypeSize(type); - if (varTypeIsIntegralOrI(type) && type != TYP_LONG) - { - assert(genIsValidIntReg(srcReg)); - inst_RV(INS_push, srcReg, type); - } - else - { - instruction ins; - emitAttr attr = emitTypeSize(type); - if (type == TYP_LONG) - { - // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg. - // This is only used when we are pushing a struct from memory to memory, and basically is - // handling an 8-byte "chunk", as opposed to strictly a long type. - ins = INS_movq; - } - else - { - ins = ins_Store(type); - } - assert(genIsValidFloatReg(srcReg)); - inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE); - GetEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0); - } - AddStackLevel(size); -} -#endif // TARGET_X86 - -#if defined(FEATURE_PUT_STRUCT_ARG_STK) -// genStoreRegToStackArg: Store a register value into the stack argument area -// -// Arguments: -// type - the type of value to be stored -// reg - the register containing the value -// offset - the offset from the base (see Assumptions below) -// -// Notes: -// A type of TYP_STRUCT instructs this method to store a 16-byte chunk -// at the given offset (i.e. not the full struct). -// -// Assumptions: -// The caller must set the context appropriately before calling this method: -// - On x64, m_stkArgVarNum must be set according to whether this is a regular or tail call. -// - On x86, the caller must set m_pushStkArg if this method should push the argument. -// Otherwise, the argument is stored at the given offset from sp. -// -// TODO: In the below code the load and store instructions are for 16 bytes, but the -// type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but -// this probably needs to be changed. -// -void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset) -{ - assert(srcReg != REG_NA); - instruction ins; - emitAttr attr; - unsigned size; - - if (type == TYP_STRUCT) - { - ins = INS_movdqu; - // This should be changed! - attr = EA_8BYTE; - size = 16; - } - else - { -#ifdef FEATURE_SIMD - if (varTypeIsSIMD(type)) - { - assert(genIsValidFloatReg(srcReg)); - ins = ins_Store(type); // TODO-CQ: pass 'aligned' correctly - } - else -#endif // FEATURE_SIMD -#ifdef TARGET_X86 - if (type == TYP_LONG) - { - assert(genIsValidFloatReg(srcReg)); - ins = INS_movq; - } - else -#endif // TARGET_X86 - { - assert((varTypeUsesFloatReg(type) && genIsValidFloatReg(srcReg)) || - (varTypeIsIntegralOrI(type) && genIsValidIntReg(srcReg))); - ins = ins_Store(type); - } - attr = emitTypeSize(type); - size = genTypeSize(type); - } - -#ifdef TARGET_X86 - if (m_pushStkArg) - { - genPushReg(type, srcReg); - } - else - { - GetEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, offset); - } -#else // !TARGET_X86 - assert(m_stkArgVarNum != BAD_VAR_NUM); - GetEmitter()->emitIns_S_R(ins, attr, srcReg, m_stkArgVarNum, m_stkArgOffset + offset); -#endif // !TARGET_X86 -} - -//--------------------------------------------------------------------- -// genPutStructArgStk - generate code for copying a struct arg on the stack by value. -// In case there are references to heap object in the struct, -// it generates the gcinfo as well. -// -// Arguments -// putArgStk - the GT_PUTARG_STK node -// -// Notes: -// In the case of fixed out args, the caller must have set m_stkArgVarNum to the variable number -// corresponding to the argument area (where we will put the argument on the stack). -// For tail calls this is the baseVarNum = 0. -// For non tail calls this is the outgoingArgSpace. -void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) -{ - GenTree* source = putArgStk->gtGetOp1(); - var_types targetType = source->TypeGet(); - -#if defined(TARGET_X86) && defined(FEATURE_SIMD) - if (putArgStk->isSIMD12()) - { - genPutArgStkSIMD12(putArgStk); - return; - } -#endif // defined(TARGET_X86) && defined(FEATURE_SIMD) - - if (varTypeIsSIMD(targetType)) - { - regNumber srcReg = genConsumeReg(source); - assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg))); - genStoreRegToStackArg(targetType, srcReg, 0); - return; - } - - assert(targetType == TYP_STRUCT); - - ClassLayout* layout = source->AsObj()->GetLayout(); - - if (!layout->HasGCPtr()) - { - switch (putArgStk->gtPutArgStkKind) - { - case GenTreePutArgStk::Kind::RepInstr: - genStructPutArgRepMovs(putArgStk); - break; - case GenTreePutArgStk::Kind::Unroll: - genStructPutArgUnroll(putArgStk); - break; - case GenTreePutArgStk::Kind::Push: - genStructPutArgUnroll(putArgStk); - break; - default: - unreached(); - } - } - else - { - // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef TARGET_X86 - // On x86, any struct that has contains GC references must be stored to the stack using `push` instructions so - // that the emitter properly detects the need to update the method's GC information. - // - // Strictly speaking, it is only necessary to use `push` to store the GC references themselves, so for structs - // with large numbers of consecutive non-GC-ref-typed fields, we may be able to improve the code size in the - // future. - assert(m_pushStkArg); - - GenTree* srcAddr = source->gtGetOp1(); - const unsigned byteSize = putArgStk->GetStackByteSize(); - assert(byteSize % TARGET_POINTER_SIZE == 0); - const unsigned numSlots = byteSize / TARGET_POINTER_SIZE; - assert(putArgStk->gtNumSlots == numSlots); - - regNumber srcRegNum = srcAddr->GetRegNum(); - const bool srcAddrInReg = srcRegNum != REG_NA; - - unsigned srcLclNum = 0; - unsigned srcLclOffset = 0; - if (srcAddrInReg) - { - genConsumeReg(srcAddr); - } - else - { - assert(srcAddr->OperIsLocalAddr()); - - srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); - srcLclOffset = srcAddr->AsLclVarCommon()->GetLclOffs(); - } - - for (int i = numSlots - 1; i >= 0; --i) - { - emitAttr slotAttr = emitTypeSize(layout->GetGCPtrType(i)); - const unsigned byteOffset = i * TARGET_POINTER_SIZE; - if (srcAddrInReg) - { - GetEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, byteOffset); - } - else - { - GetEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + byteOffset); - } - AddStackLevel(TARGET_POINTER_SIZE); - } -#else // !defined(TARGET_X86) - - // Consume these registers. - // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). - genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA); - - const bool srcIsLocal = putArgStk->gtOp1->AsObj()->gtOp1->OperIsLocalAddr(); - const emitAttr srcAddrAttr = srcIsLocal ? EA_PTRSIZE : EA_BYREF; - -#if DEBUG - unsigned numGCSlotsCopied = 0; -#endif // DEBUG - - const unsigned byteSize = putArgStk->GetStackByteSize(); - assert(byteSize % TARGET_POINTER_SIZE == 0); - const unsigned numSlots = byteSize / TARGET_POINTER_SIZE; - assert(putArgStk->gtNumSlots == numSlots); - for (unsigned i = 0; i < numSlots;) - { - if (!layout->IsGCPtr(i)) - { - // Let's see if we can use rep movsp (alias for movsd or movsq for 32 and 64 bits respectively) - // instead of a sequence of movsp instructions to save cycles and code size. - unsigned adjacentNonGCSlotCount = 0; - do - { - adjacentNonGCSlotCount++; - i++; - } while ((i < numSlots) && !layout->IsGCPtr(i)); - - // If we have a very small contiguous non-ref region, it's better just to - // emit a sequence of movsp instructions - if (adjacentNonGCSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) - { - for (; adjacentNonGCSlotCount > 0; adjacentNonGCSlotCount--) - { - instGen(INS_movsp); - } - } - else - { - GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, adjacentNonGCSlotCount); - instGen(INS_r_movsp); - } - } - else - { - // We have a GC (byref or ref) pointer - // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsp instruction, - // but the logic for emitting a GC info record is not available (it is internal for the emitter - // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do - // instGen(INS_movsp); and emission of gc info. - - var_types memType = layout->GetGCPtrType(i); - GetEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0); - genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE); -#ifdef DEBUG - numGCSlotsCopied++; -#endif // DEBUG - - i++; - if (i < numSlots) - { - // Source for the copy operation. - // If a LocalAddr, use EA_PTRSIZE - copy from stack. - // If not a LocalAddr, use EA_BYREF - the source location is not on the stack. - GetEmitter()->emitIns_R_I(INS_add, srcAddrAttr, REG_RSI, TARGET_POINTER_SIZE); - - // Always copying to the stack - outgoing arg area - // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE. - GetEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE); - } - } - } - - assert(numGCSlotsCopied == layout->GetGCPtrCount()); -#endif // TARGET_X86 - } -} -#endif // defined(FEATURE_PUT_STRUCT_ARG_STK) - -/***************************************************************************** - * - * Create and record GC Info for the function. - */ -#ifndef JIT32_GCENCODER -void -#else // !JIT32_GCENCODER -void* -#endif // !JIT32_GCENCODER -CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr)) -{ -#ifdef JIT32_GCENCODER - return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr)); -#else // !JIT32_GCENCODER - genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); -#endif // !JIT32_GCENCODER -} - -#ifdef JIT32_GCENCODER -void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, - unsigned prologSize, - unsigned epilogSize DEBUGARG(void* codePtr)) -{ - BYTE headerBuf[64]; - InfoHdr header; - - int s_cached; - -#ifdef FEATURE_EH_FUNCLETS - // We should do this before gcInfoBlockHdrSave since varPtrTableSize must be finalized before it - if (compiler->ehAnyFunclets()) - { - gcInfo.gcMarkFilterVarsPinned(); - } -#endif - -#ifdef DEBUG - size_t headerSize = -#endif - compiler->compInfoBlkSize = - gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached); - - size_t argTabOffset = 0; - size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset); - -#if DISPLAY_SIZES - - if (GetInterruptible()) - { - gcHeaderISize += compiler->compInfoBlkSize; - gcPtrMapISize += ptrMapSize; - } - else - { - gcHeaderNSize += compiler->compInfoBlkSize; - gcPtrMapNSize += ptrMapSize; - } - -#endif // DISPLAY_SIZES - - compiler->compInfoBlkSize += ptrMapSize; - - /* Allocate the info block for the method */ - - compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize); - -#if 0 // VERBOSE_SIZES - // TODO-X86-Cleanup: 'dataSize', below, is not defined - -// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100) - { - printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n", - compiler->info.compILCodeSize, - compiler->compInfoBlkSize, - codeSize + dataSize, - codeSize + dataSize - prologSize - epilogSize, - 100 * (codeSize + dataSize) / compiler->info.compILCodeSize, - 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize, - compiler->info.compClassName, - compiler->info.compMethodName); -} - -#endif - - /* Fill in the info block and return it to the caller */ - - void* infoPtr = compiler->compInfoBlkAddr; - - /* Create the method info block: header followed by GC tracking tables */ - - compiler->compInfoBlkAddr += - gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached); - - assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize); - compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset); - assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize); - -#ifdef DEBUG - - if (0) - { - BYTE* temp = (BYTE*)infoPtr; - size_t size = compiler->compInfoBlkAddr - temp; - BYTE* ptab = temp + headerSize; - - noway_assert(size == headerSize + ptrMapSize); - - printf("Method info block - header [%zu bytes]:", headerSize); - - for (unsigned i = 0; i < size; i++) - { - if (temp == ptab) - { - printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize); - printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' '); - } - else - { - if (!(i % 16)) - printf("\n %04X: ", i); - } - - printf("%02X ", *temp++); - } - - printf("\n"); - } - -#endif // DEBUG - -#if DUMP_GC_TABLES - - if (compiler->opts.dspGCtbls) - { - const BYTE* base = (BYTE*)infoPtr; - size_t size; - unsigned methodSize; - InfoHdr dumpHeader; - - printf("GC Info for method %s\n", compiler->info.compFullName); - printf("GC info size = %3u\n", compiler->compInfoBlkSize); - - size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize); - // printf("size of header encoding is %3u\n", size); - printf("\n"); - - if (compiler->opts.dspGCtbls) - { - base += size; - size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize); - // printf("size of pointer table is %3u\n", size); - printf("\n"); - noway_assert(compiler->compInfoBlkAddr == (base + size)); - } - } - -#endif // DUMP_GC_TABLES - - /* Make sure we ended up generating the expected number of bytes */ - - noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize); - - return infoPtr; -} - -#else // !JIT32_GCENCODER -void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) -{ - IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC()); - GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) - GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); - assert(gcInfoEncoder); - - // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). - gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); - - // We keep the call count for the second call to gcMakeRegPtrTable() below. - unsigned callCnt = 0; - // First we figure out the encoder ID's for the stack slots and registers. - gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); - // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). - gcInfoEncoder->FinalizeSlotIds(); - // Now we can actually use those slot ID's to declare live ranges. - gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); - - if (compiler->opts.compDbgEnC) - { - // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) - // which is: - // -return address - // -saved off RBP - // -saved 'this' pointer and bool for synchronized methods - - // 4 slots for RBP + return address + RSI + RDI - int preservedAreaSize = 4 * REGSIZE_BYTES; - - if (compiler->info.compFlags & CORINFO_FLG_SYNCH) - { - if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) - { - preservedAreaSize += REGSIZE_BYTES; - } - - // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack) - preservedAreaSize += 4; - } - - // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the - // frame - gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); - } - - if (compiler->opts.IsReversePInvoke()) - { - unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar; - assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM && reversePInvokeFrameVarNumber < compiler->lvaRefCount); - LclVarDsc& reversePInvokeFrameVar = compiler->lvaTable[reversePInvokeFrameVarNumber]; - gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar.GetStackOffset()); - } - - gcInfoEncoder->Build(); - - // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) - // let's save the values anyway for debugging purposes - compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); - compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface -} -#endif // !JIT32_GCENCODER - -/***************************************************************************** - * Emit a call to a helper function. - * - */ - -void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg) -{ - void* addr = nullptr; - void* pAddr = nullptr; - - emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; - addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); - regNumber callTarget = REG_NA; - regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); - - if (!addr) - { - assert(pAddr != nullptr); - - // Absolute indirect call addr - // Note: Order of checks is important. First always check for pc-relative and next - // zero-relative. Because the former encoding is 1-byte smaller than the latter. - if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) || - genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr)) - { - // generate call whose target is specified by 32-bit offset relative to PC or zero. - callType = emitter::EC_FUNC_TOKEN_INDIR; - addr = pAddr; - } - else - { -#ifdef TARGET_AMD64 - // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero, - // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to - // make the call. - // mov reg, addr - // call [reg] - - if (callTargetReg == REG_NA) - { - // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but - // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. - callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; - regMaskTP callTargetMask = genRegMask(callTargetReg); - noway_assert((callTargetMask & killMask) == callTargetMask); - } - else - { - // The call target must not overwrite any live variable, though it may not be in the - // kill set for the call. - regMaskTP callTargetMask = genRegMask(callTargetReg); - noway_assert((callTargetMask & regSet.GetMaskVars()) == RBM_NONE); - } -#endif - - callTarget = callTargetReg; - CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL); - callType = emitter::EC_INDIR_ARD; - } - } - - // clang-format off - GetEmitter()->emitIns_Call(callType, - compiler->eeFindHelper(helper), - INDEBUG_LDISASM_COMMA(nullptr) addr, - argSize, - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - BAD_IL_OFFSET, // IL offset - callTarget, // ireg - REG_NA, 0, 0, // xreg, xmul, disp - false // isJump - ); - // clang-format on - - regSet.verifyRegistersUsed(killMask); -} - -/***************************************************************************** -* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog -* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late -* disassembler thinks the instructions as the same as we do. -*/ - -// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. -// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. -//#define ALL_XARCH_EMITTER_UNIT_TESTS - -#if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64) -void CodeGen::genAmd64EmitterUnitTests() -{ - if (!verbose) - { - return; - } - - if (!compiler->opts.altJit) - { - // No point doing this in a "real" JIT. - return; - } - - // Mark the "fake" instructions in the output. - printf("*************** In genAmd64EmitterUnitTests()\n"); - - // We use this: - // genDefineTempLabel(genCreateTempLabel()); - // to create artificial labels to help separate groups of tests. - - // - // Loads - // - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef ALL_XARCH_EMITTER_UNIT_TESTS - genDefineTempLabel(genCreateTempLabel()); - - // vhaddpd ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vaddss xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vaddsd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vaddps xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vaddps ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vaddpd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vaddpd ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vsubss xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vsubsd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vsubps ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vsubps ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vsubpd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vsubpd ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vmulss xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vmulsd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vmulps xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vmulpd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vmulps ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vmulpd ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vandps xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vandpd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vandps ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vandpd ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vorps xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vorpd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vorps ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vorpd ymm0,ymm1,ymm2 - GetEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vdivss xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vdivsd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vdivss xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vdivsd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - - // vdivss xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); - // vdivsd xmm0,xmm1,xmm2 - GetEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); -#endif // ALL_XARCH_EMITTER_UNIT_TESTS - printf("*************** End of genAmd64EmitterUnitTests()\n"); -} - -#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64) - -#ifdef PROFILING_SUPPORTED - -#ifdef TARGET_X86 - -//----------------------------------------------------------------------------------- -// genProfilingEnterCallback: Generate the profiling function enter callback. -// -// Arguments: -// initReg - register to use as scratch register -// pInitRegZeroed - OUT parameter. This variable remains unchanged. -// -// Return Value: -// None -// -// Notes: -// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in -// VM\i386\asmhelpers.asm for details): -// 1. The calling sequence for calling the helper is: -// push FunctionIDOrClientID -// call ProfileEnterHelper -// 2. The calling function has an EBP frame. -// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, -// the following prolog is assumed: -// push ESP -// mov EBP, ESP -// 4. All registers are preserved. -// 5. The helper pops the FunctionIDOrClientID argument from the stack. -// -void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - // Give profiler a chance to back out of hooking this method - if (!compiler->compIsProfilerHookNeeded()) - { - return; - } - - unsigned saveStackLvl2 = genStackLevel; - -// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK() -// for x86 stack unwinding - -#if defined(UNIX_X86_ABI) - // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() - GetEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); -#endif // UNIX_X86_ABI - - // Push the profilerHandle - if (compiler->compProfilerMethHndIndirected) - { - GetEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); - } - - // This will emit either - // "call ip-relative 32-bit offset" or - // "mov rax, helper addr; call rax" - genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, - 0, // argSize. Again, we have to lie about it - EA_UNKNOWN); // retSize - - // Check that we have place for the push. - assert(compiler->fgGetPtrArgCntMax() >= 1); - -#if defined(UNIX_X86_ABI) - // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall - GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); -#endif // UNIX_X86_ABI - - /* Restore the stack level */ - - SetStackLevel(saveStackLvl2); -} - -//----------------------------------------------------------------------------------- -// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. -// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. -// -// Arguments: -// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL -// -// Return Value: -// None -// -// Notes: -// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and -// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details): -// 1. The calling sequence for calling the helper is: -// push FunctionIDOrClientID -// call ProfileLeaveHelper or ProfileTailcallHelper -// 2. The calling function has an EBP frame. -// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, -// the following prolog is assumed: -// push ESP -// mov EBP, ESP -// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved. -// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved. -// 5. The helper pops the FunctionIDOrClientID argument from the stack. -// -void CodeGen::genProfilingLeaveCallback(unsigned helper) -{ - assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); - - // Only hook if profiler says it's okay. - if (!compiler->compIsProfilerHookNeeded()) - { - return; - } - - compiler->info.compProfilerCallback = true; - - // Need to save on to the stack level, since the helper call will pop the argument - unsigned saveStackLvl2 = genStackLevel; - -#if defined(UNIX_X86_ABI) - // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() - GetEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); - AddStackLevel(0xC); - AddNestedAlignment(0xC); -#endif // UNIX_X86_ABI - - // - // Push the profilerHandle - // - - if (compiler->compProfilerMethHndIndirected) - { - GetEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); - } - genSinglePush(); - -#if defined(UNIX_X86_ABI) - int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl) -#else - int argSize = REGSIZE_BYTES; -#endif - genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */); - - // Check that we have place for the push. - assert(compiler->fgGetPtrArgCntMax() >= 1); - -#if defined(UNIX_X86_ABI) - // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall - GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); - SubtractStackLevel(0x10); - SubtractNestedAlignment(0xC); -#endif // UNIX_X86_ABI - - /* Restore the stack level */ - SetStackLevel(saveStackLvl2); -} - -#endif // TARGET_X86 - -#ifdef TARGET_AMD64 - -//----------------------------------------------------------------------------------- -// genProfilingEnterCallback: Generate the profiling function enter callback. -// -// Arguments: -// initReg - register to use as scratch register -// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if -// this call sets 'initReg' to a non-zero value. -// -// Return Value: -// None -// -void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - // Give profiler a chance to back out of hooking this method - if (!compiler->compIsProfilerHookNeeded()) - { - return; - } - -#if !defined(UNIX_AMD64_ABI) - - unsigned varNum; - LclVarDsc* varDsc; - - // Since the method needs to make a profiler callback, it should have out-going arg space allocated. - noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); - - // Home all arguments passed in arg registers (RCX, RDX, R8 and R9). - // In case of vararg methods, arg regs are already homed. - // - // Note: Here we don't need to worry about updating gc'info since enter - // callback is generated as part of prolog which is non-gc interruptible. - // Moreover GC cannot kick while executing inside profiler callback which is a - // profiler requirement so it can examine arguments which could be obj refs. - if (!compiler->info.compIsVarArgs) - { - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) - { - noway_assert(varDsc->lvIsParam); - - if (!varDsc->lvIsRegArg) - { - continue; - } - - var_types storeType = varDsc->lvaArgType(); - regNumber argReg = varDsc->GetArgReg(); - - instruction store_ins = ins_Store(storeType); - -#ifdef FEATURE_SIMD - if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg)) - { - store_ins = INS_mov; - } -#endif // FEATURE_SIMD - - GetEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0); - } - } - - // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) - // RCX = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - // Profiler hooks enabled during Ngen time. - // Profiler handle needs to be accessed through an indirection of a pointer. - GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - // No need to record relocations, if we are generating ELT hooks under the influence - // of COMPlus_JitELTHookEnabled=1 - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // RDX = caller's SP - // Notes - // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. - // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value - // of that offset to FramePointer to obtain caller's SP value. - assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); - - // This will emit either - // "call ip-relative 32-bit offset" or - // "mov rax, helper addr; call rax" - genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN); - - // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog - // generation logic that moves args around as required by first BB entry point conditions - // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs() - // and genEnregisterIncomingStackArgs(). - // - // Now reload arg registers from home locations. - // Vararg methods: - // - we need to reload only known (i.e. fixed) reg args. - // - if floating point type, also reload it into corresponding integer reg - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) - { - noway_assert(varDsc->lvIsParam); - - if (!varDsc->lvIsRegArg) - { - continue; - } - - var_types loadType = varDsc->lvaArgType(); - regNumber argReg = varDsc->GetArgReg(); - - instruction load_ins = ins_Load(loadType); - -#ifdef FEATURE_SIMD - if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg)) - { - load_ins = INS_mov; - } -#endif // FEATURE_SIMD - - GetEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0); - -#if FEATURE_VARARG - if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) - { - regNumber intArgReg = compiler->getCallArgIntRegister(argReg); - inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); - } -#endif // FEATURE_VARARG - } - - // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. - if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) - { - *pInitRegZeroed = false; - } - -#else // !defined(UNIX_AMD64_ABI) - - // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) - // R14 = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - // Profiler hooks enabled during Ngen time. - // Profiler handle needs to be accessed through an indirection of a pointer. - GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0, - (ssize_t)compiler->compProfilerMethHnd); - } - else - { - // No need to record relocations, if we are generating ELT hooks under the influence - // of COMPlus_JitELTHookEnabled=1 - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // R15 = caller's SP - // Notes - // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. - // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value - // of that offset to FramePointer to obtain caller's SP value. - assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset); - - // We can use any callee trash register (other than RAX, RDI, RSI) for call target. - // We use R11 here. This will emit either - // "call ip-relative 32-bit offset" or - // "mov r11, helper addr; call r11" - genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); - - // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. - if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) - { - *pInitRegZeroed = false; - } - -#endif // !defined(UNIX_AMD64_ABI) -} - -//----------------------------------------------------------------------------------- -// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. -// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. -// -// Arguments: -// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL -// -// Return Value: -// None -// -void CodeGen::genProfilingLeaveCallback(unsigned helper) -{ - assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); - - // Only hook if profiler says it's okay. - if (!compiler->compIsProfilerHookNeeded()) - { - return; - } - - compiler->info.compProfilerCallback = true; - -#if !defined(UNIX_AMD64_ABI) - - // Since the method needs to make a profiler callback, it should have out-going arg space allocated. - noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); - noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); - - // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash - // registers that profiler callback kills. - if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg()) - { - regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].GetRegNum()); - noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0); - } - - // At this point return value is computed and stored in RAX or XMM0. - // On Amd64, Leave callback preserves the return register. We keep - // RAX alive by not reporting as trashed by helper call. Also note - // that GC cannot kick-in while executing inside profiler callback, - // which is a requirement of profiler as well since it needs to examine - // return value which could be an obj ref. - - // RCX = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - // Profiler hooks enabled during Ngen time. - // Profiler handle needs to be accessed through an indirection of an address. - GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - // Don't record relocations, if we are generating ELT hooks under the influence - // of COMPlus_JitELTHookEnabled=1 - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // RDX = caller's SP - // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion - // of the stmnts to execute unconditionally and clean-up rest. - if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) - { - // Caller's SP relative offset to FramePointer will be negative. We need to add absolute - // value of that offset to FramePointer to obtain caller's SP value. - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); - } - else - { - // If we are here means that it is a tentative frame layout during which we - // cannot use caller's SP offset since it is an estimate. For now we require the - // method to have at least a single arg so that we can use it to obtain caller's - // SP. - LclVarDsc* varDsc = compiler->lvaTable; - NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); - - // lea rdx, [FramePointer + Arg0's offset] - GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); - } - - // We can use any callee trash register (other than RAX, RCX, RDX) for call target. - // We use R8 here. This will emit either - // "call ip-relative 32-bit offset" or - // "mov r8, helper addr; call r8" - genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2); - -#else // !defined(UNIX_AMD64_ABI) - - // RDI = ProfilerMethHnd - if (compiler->compProfilerMethHndIndirected) - { - GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - else - { - if (compiler->opts.compJitELTHookEnabled) - { - genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); - } - else - { - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); - } - } - - // RSI = caller's SP - if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) - { - int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); - } - else - { - LclVarDsc* varDsc = compiler->lvaTable; - NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); - - // lea rdx, [FramePointer + Arg0's offset] - GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); - } - - // We can use any callee trash register (other than RAX, RDI, RSI) for call target. - // We use R11 here. This will emit either - // "call ip-relative 32-bit offset" or - // "mov r11, helper addr; call r11" - genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); - -#endif // !defined(UNIX_AMD64_ABI) -} - -#endif // TARGET_AMD64 - -#endif // PROFILING_SUPPORTED - -//------------------------------------------------------------------------ -// genPushCalleeSavedRegisters: Push any callee-saved registers we have used. -// -void CodeGen::genPushCalleeSavedRegisters() -{ - assert(compiler->compGeneratingProlog); - - // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack - // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not - // here. - regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED; - -#if ETW_EBP_FRAMED - if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) - { - noway_assert(!"Used register RBM_FPBASE as a scratch register!"); - } -#endif - - // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method - if (isFramePointerUsed()) - { - rsPushRegs &= ~RBM_FPBASE; - } - -#ifdef DEBUG - if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) - { - printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", - compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); - dspRegMask(rsPushRegs); - printf("\n"); - assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); - } -#endif // DEBUG - - // Push backwards so we match the order we will pop them in the epilog - // and all the other code that expects it to be in this order. - for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg)) - { - regMaskTP regBit = genRegMask(reg); - - if ((regBit & rsPushRegs) != 0) - { - inst_RV(INS_push, reg, TYP_REF); - compiler->unwindPush(reg); -#ifdef USING_SCOPE_INFO - if (!doubleAlignOrFramePointerUsed()) - { - psiAdjustStackLevel(REGSIZE_BYTES); - } -#endif // USING_SCOPE_INFO - rsPushRegs &= ~regBit; - } - } -} - -#endif // TARGET_XARCH +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Amd64/x86 Code Generator XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#pragma warning(disable : 4310) // cast truncates constant value - happens for (int8_t)0xb1 +#endif + +#ifdef TARGET_XARCH +#include "emit.h" +#include "codegen.h" +#include "lower.h" +#include "gcinfo.h" +#include "gcinfoencoder.h" +#include "patchpointinfo.h" + +/***************************************************************************** + * + * Generate code that will set the given register to the integer constant. + */ + +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) +{ + // Reg cannot be a FP reg + assert(!genIsValidFloatReg(reg)); + + // The only TYP_REF constant that can come this path is a managed 'null' since it is not + // relocatable. Other ref type constants (e.g. string objects) go through a different + // code path. + noway_assert(type != TYP_REF || val == 0); + + if (val == 0) + { + instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags); + } + else + { + // TODO-XArch-CQ: needs all the optimized cases + GetEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val); + } +} + +//--------------------------------------------------------------------- +// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog. +// +// Arguments: +// initReg - register to use as a scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. +// +// Return Value: +// None +// +void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + if (!compiler->getNeedsGSSecurityCookie()) + { + return; + } + + if (compiler->opts.IsOSR() && compiler->info.compPatchpointInfo->HasSecurityCookie()) + { + // Security cookie is on original frame and was initialized there. + return; + } + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { + noway_assert(compiler->gsGlobalSecurityCookieVal != 0); +#ifdef TARGET_AMD64 + if ((int)compiler->gsGlobalSecurityCookieVal != compiler->gsGlobalSecurityCookieVal) + { + // initReg = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = initReg + genSetRegToIcon(initReg, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); + *pInitRegZeroed = false; + } + else +#endif + { + // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal + GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, + (int)compiler->gsGlobalSecurityCookieVal); + } + } + else + { + // Always use EAX on x86 and x64 + // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it. + // mov eax, dword ptr [compiler->gsGlobalSecurityCookieAddr] + // mov dword ptr [frame.GSSecurityCookie], eax + GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_EAX, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + regSet.verifyRegUsed(REG_EAX); + GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, REG_EAX, compiler->lvaGSSecurityCookie, 0); + if (initReg == REG_EAX) + { + *pInitRegZeroed = false; + } + } +} + +/***************************************************************************** + * + * Generate code to check that the GS cookie wasn't thrashed by a buffer + * overrun. If pushReg is true, preserve all registers around code sequence. + * Otherwise ECX could be modified. + * + * Implementation Note: pushReg = true, in case of tail calls. + */ +void CodeGen::genEmitGSCookieCheck(bool pushReg) +{ + noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); + + // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while + // executing GS cookie check will not collect the object pointed to by EAX. + // + // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX + // In such case make sure that the correct GC-ness of RDX is reported as well, so + // a GC object pointed by RDX will not be collected. + if (!pushReg) + { + // Handle multi-reg return type values + if (compiler->compMethodReturnsMultiRegRetType()) + { + ReturnTypeDesc retTypeDesc; + if (varTypeIsLong(compiler->info.compRetNativeType)) + { + retTypeDesc.InitializeLongReturnType(); + } + else // we must have a struct return type + { + retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass, + compiler->info.compCallConv); + } + + const unsigned regCount = retTypeDesc.GetReturnRegCount(); + + // Only x86 and x64 Unix ABI allows multi-reg return and + // number of result regs should be equal to MAX_RET_REG_COUNT. + assert(regCount == MAX_RET_REG_COUNT); + + for (unsigned i = 0; i < regCount; ++i) + { + gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i)); + } + } + else if (compiler->compMethodReturnsRetBufAddr()) + { + // This is for returning in an implicit RetBuf. + // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef. + + // In case the return is in an implicit RetBuf, the native return type should be a struct + assert(varTypeIsStruct(compiler->info.compRetNativeType)); + + gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF); + } + // ... all other cases. + else + { +#ifdef TARGET_AMD64 + // For x64, structs that are not returned in registers are always + // returned in implicit RetBuf. If we reached here, we should not have + // a RetBuf and the return type should not be a struct. + assert(compiler->info.compRetBuffArg == BAD_VAR_NUM); + assert(!varTypeIsStruct(compiler->info.compRetNativeType)); +#endif // TARGET_AMD64 + + // For x86 Windows we can't make such assertions since we generate code for returning of + // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise + // compRetNativeType could be TYP_STRUCT. + gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType); + } + } + + regNumber regGSCheck; + regMaskTP regMaskGSCheck = RBM_NONE; + + if (!pushReg) + { + // Non-tail call: we can use any callee trash register that is not + // a return register or contain 'this' pointer (keep alive this), since + // we are generating GS cookie check after a GT_RETURN block. + // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well + // as return register for two-register-returned structs. + if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg() && + (compiler->lvaTable[compiler->info.compThisArg].GetRegNum() == REG_ARG_0)) + { + regGSCheck = REG_ARG_1; + } + else + { + regGSCheck = REG_ARG_0; + } + } + else + { +#ifdef TARGET_X86 + // It doesn't matter which register we pick, since we're going to save and restore it + // around the check. + // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes? + regGSCheck = REG_EAX; + regMaskGSCheck = RBM_EAX; +#else // !TARGET_X86 + // Jmp calls: specify method handle using which JIT queries VM for its entry point + // address and hence it can neither be a VSD call nor PInvoke calli with cookie + // parameter. Therefore, in case of jmp calls it is safe to use R11. + regGSCheck = REG_R11; +#endif // !TARGET_X86 + } + + regMaskTP byrefPushedRegs = RBM_NONE; + regMaskTP norefPushedRegs = RBM_NONE; + regMaskTP pushedRegs = RBM_NONE; + + if (compiler->gsGlobalSecurityCookieAddr == nullptr) + { +#if defined(TARGET_AMD64) + // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'. + // Otherwise, load the value into a reg and use 'cmp mem64, reg64'. + if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal) + { + genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); + GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); + } + else +#endif // defined(TARGET_AMD64) + { + assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal); + GetEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, + (int)compiler->gsGlobalSecurityCookieVal); + } + } + else + { + // Ngen case - GS cookie value needs to be accessed through an indirection. + + pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs); + + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr); + GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0); + GetEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); + } + + BasicBlock* gsCheckBlk = genCreateTempLabel(); + inst_JMP(EJ_je, gsCheckBlk); + genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); + genDefineTempLabel(gsCheckBlk); + + genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs); +} + +BasicBlock* CodeGen::genCallFinally(BasicBlock* block) +{ +#if defined(FEATURE_EH_FUNCLETS) + // Generate a call to the finally, like this: + // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym + // call finally-funclet + // jmp finally-return // Only for non-retless finally calls + // The jmp can be a NOP if we're going to the next block. + // If we're generating code for the main function (not a funclet), and there is no localloc, + // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP + // instead of loading the PSPSym in this case, or if PSPSym is not used (CoreRT ABI). + + if ((compiler->lvaPSPSym == BAD_VAR_NUM) || + (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))) + { +#ifndef UNIX_X86_ABI + inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL); +#endif // !UNIX_X86_ABI + } + else + { + GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0); + } + GetEmitter()->emitIns_J(INS_call, block->bbJumpDest); + + if (block->bbFlags & BBF_RETLESS_CALL) + { + // We have a retless call, and the last instruction generated was a call. + // If the next block is in a different EH region (or is the end of the code + // block), then we need to generate a breakpoint here (since it will never + // get executed) to get proper unwind behavior. + + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) + { + instGen(INS_BREAKPOINT); // This should never get executed + } + } + else + { +// TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other +// architectures? +#ifndef JIT32_GCENCODER + // Because of the way the flowgraph is connected, the liveness info for this one instruction + // after the call is not (can not be) correct in cases where a variable has a last use in the + // handler. So turn off GC reporting for this single instruction. + GetEmitter()->emitDisableGC(); +#endif // JIT32_GCENCODER + + // Now go to where the finally funclet needs to return to. + if (block->bbNext->bbJumpDest == block->bbNext->bbNext) + { + // Fall-through. + // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly + // to the next instruction? This would depend on stack walking from within the finally + // handler working without this instruction being in this special EH region. + instGen(INS_nop); + } + else + { + inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); + } + +#ifndef JIT32_GCENCODER + GetEmitter()->emitEnableGC(); +#endif // JIT32_GCENCODER + } + +#else // !FEATURE_EH_FUNCLETS + + // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot + // corresponding to the finally's nesting level. When invoked in response to an exception, the + // EE does this. + // + // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS. + // + // We will emit : + // mov [ebp - (n + 1)], 0 + // mov [ebp - n ], 0xFC + // push &step + // jmp finallyBlock + // ... + // step: + // mov [ebp - n ], 0 + // jmp leaveTarget + // ... + // leaveTarget: + + noway_assert(isFramePointerUsed()); + + // Get the nesting level which contains the finally + unsigned finallyNesting = 0; + compiler->fgGetNestingLevel(block, &finallyNesting); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + + unsigned curNestingSlotOffs; + curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE)); + + // Zero out the slot for the next nesting level + GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs - TARGET_POINTER_SIZE, 0); + GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, curNestingSlotOffs, LCL_FINALLY_MARK); + + // Now push the address where the finally funclet should return to directly. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + GetEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest); + } + else + { + // EE expects a DWORD, so we provide 0 + inst_IV(INS_push_hide, 0); + } + + // Jump to the finally BB + inst_JMP(EJ_jmp, block->bbJumpDest); + +#endif // !FEATURE_EH_FUNCLETS + + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + block = block->bbNext; + } + return block; +} + +#if defined(FEATURE_EH_FUNCLETS) +void CodeGen::genEHCatchRet(BasicBlock* block) +{ + // Set RAX to the address the VM should return to after the catch. + // Generate a RIP-relative + // lea reg, [rip + disp32] ; the RIP is implicit + // which will be position-independent. + GetEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET); +} + +#else // !FEATURE_EH_FUNCLETS + +void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block) +{ + // The last statement of the block must be a GT_RETFILT, which has already been generated. + assert(block->lastNode() != nullptr); + assert(block->lastNode()->OperGet() == GT_RETFILT); + + if (block->bbJumpKind == BBJ_EHFINALLYRET) + { + assert(block->lastNode()->AsOp()->gtOp1 == nullptr); // op1 == nullptr means endfinally + + // Return using a pop-jmp sequence. As the "try" block calls + // the finally with a jmp, this leaves the x86 call-ret stack + // balanced in the normal flow of path. + + noway_assert(isFramePointerRequired()); + inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); + inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); + } + else + { + assert(block->bbJumpKind == BBJ_EHFILTERRET); + + // The return value has already been computed. + instGen_Return(0); + } +} + +#endif // !FEATURE_EH_FUNCLETS + +// Move an immediate value into an integer register + +void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, + regNumber reg, + ssize_t imm, + insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(unsigned gtFlags)) +{ + // reg cannot be a FP register + assert(!genIsValidFloatReg(reg)); + + if (!compiler->opts.compReloc) + { + size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs + } + + if ((imm == 0) && !EA_IS_RELOC(size)) + { + instGen_Set_Reg_To_Zero(size, reg, flags); + } + else + { + if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm)) + { + emitAttr newSize = EA_PTR_DSP_RELOC; + if (EA_IS_BYREF(size)) + { + newSize = EA_SET_FLG(newSize, EA_BYREF_FLG); + } + + GetEmitter()->emitIns_R_AI(INS_lea, newSize, reg, imm); + } + else + { + GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm); + } + } + regSet.verifyRegUsed(reg); +} + +/*********************************************************************************** + * + * Generate code to set a register 'targetReg' of type 'targetType' to the constant + * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call + * genProduceReg() on the target register. + */ +void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree) +{ + switch (tree->gtOper) + { + case GT_CNS_INT: + { + // relocatable values tend to come down as a CNS_INT of native int type + // so the line between these two opcodes is kind of blurry + GenTreeIntConCommon* con = tree->AsIntConCommon(); + ssize_t cnsVal = con->IconValue(); + + if (con->ImmedValNeedsReloc(compiler)) + { + emitAttr size = EA_HANDLE_CNS_RELOC; + + if (targetType == TYP_BYREF) + { + size = EA_SET_FLG(size, EA_BYREF_FLG); + } + + instGen_Set_Reg_To_Imm(size, targetReg, cnsVal); + regSet.verifyRegUsed(targetReg); + } + else + { + genSetRegToIcon(targetReg, cnsVal, targetType); + } + } + break; + + case GT_CNS_DBL: + { + emitter* emit = GetEmitter(); + emitAttr size = emitTypeSize(targetType); + double constValue = tree->AsDblCon()->gtDconVal; + + // Make sure we use "xorps reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0) + if (*(__int64*)&constValue == 0) + { + // A faster/smaller way to generate 0 + emit->emitIns_R_R(INS_xorps, size, targetReg, targetReg); + } + else + { + CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size); + emit->emitIns_R_C(ins_Load(targetType), size, targetReg, hnd, 0); + } + } + break; + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForNegNot(GenTree* tree) +{ + assert(tree->OperIs(GT_NEG, GT_NOT)); + + regNumber targetReg = tree->GetRegNum(); + var_types targetType = tree->TypeGet(); + + if (varTypeIsFloating(targetType)) + { + assert(tree->gtOper == GT_NEG); + genSSE2BitwiseOp(tree); + } + else + { + GenTree* operand = tree->gtGetOp1(); + assert(operand->isUsedFromReg()); + regNumber operandReg = genConsumeReg(operand); + + if (operandReg != targetReg) + { + inst_RV_RV(INS_mov, targetReg, operandReg, targetType); + } + + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + inst_RV(ins, targetReg, targetType); + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForBswap(GenTree* tree) +{ + // TODO: If we're swapping immediately after a read from memory or immediately before + // a write to memory, use the MOVBE instruction instead of the BSWAP instruction if + // the platform supports it. + + assert(tree->OperIs(GT_BSWAP, GT_BSWAP16)); + + regNumber targetReg = tree->GetRegNum(); + var_types targetType = tree->TypeGet(); + + GenTree* operand = tree->gtGetOp1(); + assert(operand->isUsedFromReg()); + regNumber operandReg = genConsumeReg(operand); + + if (operandReg != targetReg) + { + inst_RV_RV(INS_mov, targetReg, operandReg, targetType); + } + + if (tree->OperIs(GT_BSWAP)) + { + // 32-bit and 64-bit byte swaps use "bswap reg" + inst_RV(INS_bswap, targetReg, targetType); + } + else + { + // 16-bit byte swaps use "ror reg.16, 8" + inst_RV_IV(INS_ror_N, targetReg, 8 /* val */, emitAttr::EA_2BYTE); + } + + genProduceReg(tree); +} + +// Generate code to get the high N bits of a N*N=2N bit multiplication result +void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) +{ + assert(!treeNode->gtOverflowEx()); + + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + emitAttr size = emitTypeSize(treeNode); + GenTree* op1 = treeNode->AsOp()->gtOp1; + GenTree* op2 = treeNode->AsOp()->gtOp2; + + // to get the high bits of the multiply, we are constrained to using the + // 1-op form: RDX:RAX = RAX * rm + // The 3-op form (Rx=Ry*Rz) does not support it. + + genConsumeOperands(treeNode->AsOp()); + + GenTree* regOp = op1; + GenTree* rmOp = op2; + + // Set rmOp to the memory operand (if any) + if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->GetRegNum() == REG_RAX))) + { + regOp = op2; + rmOp = op1; + } + assert(regOp->isUsedFromReg()); + + // Setup targetReg when neither of the source operands was a matching register + if (regOp->GetRegNum() != REG_RAX) + { + inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->GetRegNum(), targetType); + } + + instruction ins; + if ((treeNode->gtFlags & GTF_UNSIGNED) == 0) + { + ins = INS_imulEAX; + } + else + { + ins = INS_mulEAX; + } + emit->emitInsBinary(ins, size, treeNode, rmOp); + + // Move the result to the desired register, if necessary + if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX) + { + inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); + } + + genProduceReg(treeNode); +} + +#ifdef TARGET_X86 +//------------------------------------------------------------------------ +// genCodeForLongUMod: Generate code for a tree of the form +// `(umod (gt_long x y) (const int))` +// +// Arguments: +// node - the node for which to generate code +// +void CodeGen::genCodeForLongUMod(GenTreeOp* node) +{ + assert(node != nullptr); + assert(node->OperGet() == GT_UMOD); + assert(node->TypeGet() == TYP_INT); + + GenTreeOp* const dividend = node->gtOp1->AsOp(); + assert(dividend->OperGet() == GT_LONG); + assert(varTypeIsLong(dividend)); + + genConsumeOperands(node); + + GenTree* const dividendLo = dividend->gtOp1; + GenTree* const dividendHi = dividend->gtOp2; + assert(dividendLo->isUsedFromReg()); + assert(dividendHi->isUsedFromReg()); + + GenTree* const divisor = node->gtOp2; + assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT); + assert(divisor->gtSkipReloadOrCopy()->isUsedFromReg()); + assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2); + assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff); + + // dividendLo must be in RAX; dividendHi must be in RDX + genCopyRegIfNeeded(dividendLo, REG_EAX); + genCopyRegIfNeeded(dividendHi, REG_EDX); + + // At this point, EAX:EDX contains the 64bit dividend and op2->GetRegNum() + // contains the 32bit divisor. We want to generate the following code: + // + // cmp edx, divisor->GetRegNum() + // jb noOverflow + // + // mov temp, eax + // mov eax, edx + // xor edx, edx + // div divisor->GetRegNum() + // mov eax, temp + // + // noOverflow: + // div divisor->GetRegNum() + // + // This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c. + + BasicBlock* const noOverflow = genCreateTempLabel(); + + // cmp edx, divisor->GetRegNum() + // jb noOverflow + inst_RV_RV(INS_cmp, REG_EDX, divisor->GetRegNum()); + inst_JMP(EJ_jb, noOverflow); + + // mov temp, eax + // mov eax, edx + // xor edx, edx + // div divisor->GetRegNum() + // mov eax, temp + const regNumber tempReg = node->GetSingleTempReg(); + inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT); + inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); + inst_RV(INS_div, divisor->GetRegNum(), TYP_INT); + inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT); + + // noOverflow: + // div divisor->GetRegNum() + genDefineTempLabel(noOverflow); + inst_RV(INS_div, divisor->GetRegNum(), TYP_INT); + + const regNumber targetReg = node->GetRegNum(); + if (targetReg != REG_EDX) + { + inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT); + } + genProduceReg(node); +} +#endif // TARGET_X86 + +//------------------------------------------------------------------------ +// genCodeForDivMod: Generate code for a DIV or MOD operation. +// +// Arguments: +// treeNode - the node to generate the code for +// +void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) +{ + assert(treeNode->OperIs(GT_DIV, GT_UDIV, GT_MOD, GT_UMOD)); + + GenTree* dividend = treeNode->gtOp1; + +#ifdef TARGET_X86 + if (varTypeIsLong(dividend->TypeGet())) + { + genCodeForLongUMod(treeNode); + return; + } +#endif // TARGET_X86 + + GenTree* divisor = treeNode->gtOp2; + genTreeOps oper = treeNode->OperGet(); + emitAttr size = emitTypeSize(treeNode); + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + + // Node's type must be int/native int, small integer types are not + // supported and floating point types are handled by genCodeForBinary. + assert(varTypeIsIntOrI(targetType)); + // dividend is in a register. + assert(dividend->isUsedFromReg()); + + genConsumeOperands(treeNode->AsOp()); + // dividend must be in RAX + genCopyRegIfNeeded(dividend, REG_RAX); + + // zero or sign extend rax to rdx + if (oper == GT_UMOD || oper == GT_UDIV || + (dividend->IsIntegralConst() && (dividend->AsIntConCommon()->IconValue() > 0))) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); + } + else + { + emit->emitIns(INS_cdq, size); + // the cdq instruction writes RDX, So clear the gcInfo for RDX + gcInfo.gcMarkRegSetNpt(RBM_RDX); + } + + // Perform the 'targetType' (64-bit or 32-bit) divide instruction + instruction ins; + if (oper == GT_UMOD || oper == GT_UDIV) + { + ins = INS_div; + } + else + { + ins = INS_idiv; + } + + emit->emitInsBinary(ins, size, treeNode, divisor); + + // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX. + // Move the result to the desired register, if necessary + if (oper == GT_DIV || oper == GT_UDIV) + { + if (targetReg != REG_RAX) + { + inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); + } + } + else + { + assert((oper == GT_MOD) || (oper == GT_UMOD)); + if (targetReg != REG_RDX) + { + inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); + } + } + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForBinary: Generate code for many binary arithmetic operators +// +// Arguments: +// treeNode - The binary operation for which we are generating code. +// +// Return Value: +// None. +// +// Notes: +// Integer MUL and DIV variants have special constraints on x64 so are not handled here. +// See the assert below for the operators that are handled. + +void CodeGen::genCodeForBinary(GenTreeOp* treeNode) +{ +#ifdef DEBUG + bool isValidOper = treeNode->OperIs(GT_ADD, GT_SUB); + if (varTypeIsFloating(treeNode->TypeGet())) + { + isValidOper |= treeNode->OperIs(GT_MUL, GT_DIV); + } + else + { + isValidOper |= treeNode->OperIs(GT_AND, GT_OR, GT_XOR); +#ifndef TARGET_64BIT + isValidOper |= treeNode->OperIs(GT_ADD_LO, GT_ADD_HI, GT_SUB_LO, GT_SUB_HI); +#endif + } + assert(isValidOper); +#endif + + genConsumeOperands(treeNode); + + const genTreeOps oper = treeNode->OperGet(); + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + + // Commutative operations can mark op1 as contained or reg-optional to generate "op reg, memop/immed" + if (!op1->isUsedFromReg()) + { + assert(treeNode->OperIsCommutative()); + assert(op1->isMemoryOp() || op1->IsLocal() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || + op1->IsRegOptional()); + + op1 = treeNode->gtGetOp2(); + op2 = treeNode->gtGetOp1(); + } + + instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); + + // The arithmetic node must be sitting in a register (since it's not contained) + noway_assert(targetReg != REG_NA); + + regNumber op1reg = op1->isUsedFromReg() ? op1->GetRegNum() : REG_NA; + regNumber op2reg = op2->isUsedFromReg() ? op2->GetRegNum() : REG_NA; + + if (varTypeIsFloating(treeNode->TypeGet())) + { + // floating-point addition, subtraction, multiplication, and division + // all have RMW semantics if VEX support is not available + + bool isRMW = !compiler->canUseVexEncoding(); + inst_RV_RV_TT(ins, emitTypeSize(treeNode), targetReg, op1reg, op2, isRMW); + + genProduceReg(treeNode); + return; + } + + GenTree* dst; + GenTree* src; + + // This is the case of reg1 = reg1 op reg2 + // We're ready to emit the instruction without any moves + if (op1reg == targetReg) + { + dst = op1; + src = op2; + } + // We have reg1 = reg2 op reg1 + // In order for this operation to be correct + // we need that op is a commutative operation so + // we can convert it into reg1 = reg1 op reg2 and emit + // the same code as above + else if (op2reg == targetReg) + { + noway_assert(GenTree::OperIsCommutative(oper)); + dst = op2; + src = op1; + } + // now we know there are 3 different operands so attempt to use LEA + else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags + && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->gtSetFlags()) + { + if (op2->isContainedIntOrIImmed()) + { + emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, + (int)op2->AsIntConCommon()->IconValue()); + } + else + { + assert(op2reg != REG_NA); + emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0); + } + genProduceReg(treeNode); + return; + } + // dest, op1 and op2 registers are different: + // reg3 = reg1 op reg2 + // We can implement this by issuing a mov: + // reg3 = reg1 + // reg3 = reg3 op reg2 + else + { + var_types op1Type = op1->TypeGet(); + inst_RV_RV(ins_Copy(op1Type), targetReg, op1reg, op1Type); + regSet.verifyRegUsed(targetReg); + gcInfo.gcMarkRegPtrVal(targetReg, op1Type); + dst = treeNode; + src = op2; + } + + // try to use an inc or dec + if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx()) + { + if (src->IsIntegralConst(1)) + { + emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg); + genProduceReg(treeNode); + return; + } + else if (src->IsIntegralConst(-1)) + { + emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg); + genProduceReg(treeNode); + return; + } + } + regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src); + noway_assert(r == targetReg); + + if (treeNode->gtOverflowEx()) + { +#if !defined(TARGET_64BIT) + assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI); +#else + assert(oper == GT_ADD || oper == GT_SUB); +#endif + genCheckOverflow(treeNode); + } + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForMul: Generate code for a MUL operation. +// +// Arguments: +// treeNode - the node to generate the code for +// +void CodeGen::genCodeForMul(GenTreeOp* treeNode) +{ + assert(treeNode->OperIs(GT_MUL)); + + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + + // Node's type must be int or long (only on x64), small integer types are not + // supported and floating point types are handled by genCodeForBinary. + assert(varTypeIsIntOrI(targetType)); + + instruction ins; + emitAttr size = emitTypeSize(treeNode); + bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0); + bool requiresOverflowCheck = treeNode->gtOverflowEx(); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* op2 = treeNode->gtGetOp2(); + + // there are 3 forms of x64 multiply: + // 1-op form with 128 result: RDX:RAX = RAX * rm + // 2-op form: reg *= rm + // 3-op form: reg = rm * imm + + genConsumeOperands(treeNode); + + // This matches the 'mul' lowering in Lowering::SetMulOpCounts() + // + // immOp :: Only one operand can be an immediate + // rmOp :: Only one operand can be a memory op. + // regOp :: A register op (especially the operand that matches 'targetReg') + // (can be nullptr when we have both a memory op and an immediate op) + + GenTree* immOp = nullptr; + GenTree* rmOp = op1; + GenTree* regOp; + + if (op2->isContainedIntOrIImmed()) + { + immOp = op2; + } + else if (op1->isContainedIntOrIImmed()) + { + immOp = op1; + rmOp = op2; + } + + if (immOp != nullptr) + { + // CQ: When possible use LEA for mul by imm 3, 5 or 9 + ssize_t imm = immOp->AsIntConCommon()->IconValue(); + + if (!requiresOverflowCheck && rmOp->isUsedFromReg() && ((imm == 3) || (imm == 5) || (imm == 9))) + { + // We will use the LEA instruction to perform this multiply + // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9. + unsigned int scale = (unsigned int)(imm - 1); + GetEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->GetRegNum(), rmOp->GetRegNum(), scale, 0); + } + else if (!requiresOverflowCheck && rmOp->isUsedFromReg() && (imm == genFindLowestBit(imm)) && (imm != 0)) + { + // Use shift for constant multiply when legal + uint64_t zextImm = static_cast(static_cast(imm)); + unsigned int shiftAmount = genLog2(zextImm); + + if (targetReg != rmOp->GetRegNum()) + { + // Copy reg src to dest register + inst_RV_RV(INS_mov, targetReg, rmOp->GetRegNum(), targetType); + } + inst_RV_SH(INS_shl, size, targetReg, shiftAmount); + } + else + { + // use the 3-op form with immediate + ins = GetEmitter()->inst3opImulForReg(targetReg); + emit->emitInsBinary(ins, size, rmOp, immOp); + } + } + else // we have no contained immediate operand + { + regOp = op1; + rmOp = op2; + + regNumber mulTargetReg = targetReg; + if (isUnsignedMultiply && requiresOverflowCheck) + { + ins = INS_mulEAX; + mulTargetReg = REG_RAX; + } + else + { + ins = INS_imul; + } + + // Set rmOp to the memory operand (if any) + // or set regOp to the op2 when it has the matching target register for our multiply op + // + if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->GetRegNum() == mulTargetReg))) + { + regOp = op2; + rmOp = op1; + } + assert(regOp->isUsedFromReg()); + + // Setup targetReg when neither of the source operands was a matching register + if (regOp->GetRegNum() != mulTargetReg) + { + inst_RV_RV(INS_mov, mulTargetReg, regOp->GetRegNum(), targetType); + } + + emit->emitInsBinary(ins, size, treeNode, rmOp); + + // Move the result to the desired register, if necessary + if ((ins == INS_mulEAX) && (targetReg != REG_RAX)) + { + inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); + } + } + + if (requiresOverflowCheck) + { + // Overflow checking is only used for non-floating point types + noway_assert(!varTypeIsFloating(treeNode)); + + genCheckOverflow(treeNode); + } + + genProduceReg(treeNode); +} + +#ifdef FEATURE_SIMD + +//------------------------------------------------------------------------ +// genSIMDSplitReturn: Generates code for returning a fixed-size SIMD type that lives +// in a single register, but is returned in multiple registers. +// +// Arguments: +// src - The source of the return +// retTypeDesc - The return type descriptor. +// +void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) +{ + assert(varTypeIsSIMD(src)); + assert(src->isUsedFromReg()); + + // This is a case of operand is in a single reg and needs to be + // returned in multiple ABI return registers. + regNumber opReg = src->GetRegNum(); + regNumber reg0 = retTypeDesc->GetABIReturnReg(0); + regNumber reg1 = retTypeDesc->GetABIReturnReg(1); + + assert((reg0 != REG_NA) && (reg1 != REG_NA) && (opReg != REG_NA)); + + const bool srcIsFloatReg = genIsValidFloatReg(opReg); + const bool dstIsFloatReg = genIsValidFloatReg(reg0); + assert(srcIsFloatReg); + +#ifdef TARGET_AMD64 + assert(src->TypeIs(TYP_SIMD16)); + assert(srcIsFloatReg == dstIsFloatReg); + if (opReg != reg0 && opReg != reg1) + { + // Operand reg is different from return regs. + // Copy opReg to reg0 and let it to be handled by one of the + // two cases below. + inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); + opReg = reg0; + } + + if (opReg == reg0) + { + assert(opReg != reg1); + // reg1 = opReg. + inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg1, opReg, TYP_SIMD16); + } + else + { + assert(opReg == reg1); + + // reg0 = opReg. + + inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); + } + // reg0 - already has required 8-byte in bit position [63:0]. + // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01); + +#else // TARGET_X86 + assert(src->TypeIs(TYP_SIMD8)); + assert(srcIsFloatReg != dstIsFloatReg); + assert((reg0 == REG_EAX) && (reg1 == REG_EDX)); + // reg0 = opReg[31:0] + inst_RV_RV(ins_Copy(opReg, TYP_INT), reg0, opReg, TYP_INT); + // reg1 = opRef[61:32] + if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1); + } + else + { + int8_t shuffleMask = 1; // we only need [61:32]->[31:0], the rest is not read. + inst_RV_TT_IV(INS_pshufd, EA_8BYTE, opReg, src, shuffleMask); + inst_RV_RV(ins_Copy(opReg, TYP_INT), reg1, opReg, TYP_INT); + } +#endif // TARGET_X86 +} + +#endif // FEATURE_SIMD + +#if defined(TARGET_X86) + +//------------------------------------------------------------------------ +// genFloatReturn: Generates code for float return statement for x86. +// +// Note: treeNode's and op1's registers are already consumed. +// +// Arguments: +// treeNode - The GT_RETURN or GT_RETFILT tree node with float type. +// +// Return Value: +// None +// +void CodeGen::genFloatReturn(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + assert(varTypeIsFloating(treeNode)); + + GenTree* op1 = treeNode->gtGetOp1(); + // Spill the return value register from an XMM register to the stack, then load it on the x87 stack. + // If it already has a home location, use that. Otherwise, we need a temp. + if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->AsLclVarCommon()->GetLclNum()].lvOnFrame) + { + if (compiler->lvaTable[op1->AsLclVarCommon()->GetLclNum()].GetRegNum() != REG_STK) + { + op1->gtFlags |= GTF_SPILL; + inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->AsLclVarCommon()->GetLclNum())), + emitTypeSize(op1->TypeGet()), op1, op1->GetRegNum()); + } + // Now, load it to the fp stack. + GetEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->GetLclNum(), 0); + } + else + { + // Spill the value, which should be in a register, then load it to the fp stack. + // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). + op1->gtFlags |= GTF_SPILL; + regSet.rsSpillTree(op1->GetRegNum(), op1); + op1->gtFlags |= GTF_SPILLED; + op1->gtFlags &= ~GTF_SPILL; + + TempDsc* t = regSet.rsUnspillInPlace(op1, op1->GetRegNum()); + inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); + op1->gtFlags &= ~GTF_SPILLED; + regSet.tmpRlsTemp(t); + } +} +#endif // TARGET_X86 + +//------------------------------------------------------------------------ +// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE/GT_CMP node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForCompare(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT, GT_TEST_EQ, GT_TEST_NE, GT_CMP)); + + // TODO-XArch-CQ: Check if we can use the currently set flags. + // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register + // (signed < or >= where targetReg != REG_NA) + + GenTree* op1 = tree->gtOp1; + var_types op1Type = op1->TypeGet(); + + if (varTypeIsFloating(op1Type)) + { + genCompareFloat(tree); + } + else + { + genCompareInt(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForBT: Generates code for a GT_BT node. +// +// Arguments: +// tree - The node. +// +void CodeGen::genCodeForBT(GenTreeOp* bt) +{ + assert(bt->OperIs(GT_BT)); + + GenTree* op1 = bt->gtGetOp1(); + GenTree* op2 = bt->gtGetOp2(); + var_types type = genActualType(op1->TypeGet()); + + assert(op1->isUsedFromReg() && op2->isUsedFromReg()); + assert((genTypeSize(type) >= genTypeSize(TYP_INT)) && (genTypeSize(type) <= genTypeSize(TYP_I_IMPL))); + + genConsumeOperands(bt); + // Note that the emitter doesn't fully support INS_bt, it only supports the reg,reg + // form and encodes the registers in reverse order. To get the correct order we need + // to reverse the operands when calling emitIns_R_R. + GetEmitter()->emitIns_R_R(INS_bt, emitTypeSize(type), op2->GetRegNum(), op1->GetRegNum()); +} + +// clang-format off +const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] +{ + { }, // NONE + { }, // 1 + { EJ_jl }, // SLT + { EJ_jle }, // SLE + { EJ_jge }, // SGE + { EJ_jg }, // SGT + { EJ_js }, // S + { EJ_jns }, // NS + + { EJ_je }, // EQ + { EJ_jne }, // NE + { EJ_jb }, // ULT + { EJ_jbe }, // ULE + { EJ_jae }, // UGE + { EJ_ja }, // UGT + { EJ_jb }, // C + { EJ_jae }, // NC + + // Floating point compare instructions (UCOMISS, UCOMISD etc.) set the condition flags as follows: + // ZF PF CF Meaning + // --------------------- + // 1 1 1 Unordered + // 0 0 0 Greater + // 0 0 1 Less Than + // 1 0 0 Equal + // + // Since ZF and CF are also set when the result is unordered, in some cases we first need to check + // PF before checking ZF/CF. In general, ordered conditions will result in a jump only if PF is not + // set and unordered conditions will result in a jump only if PF is set. + + { EJ_jnp, GT_AND, EJ_je }, // FEQ + { EJ_jne }, // FNE + { EJ_jnp, GT_AND, EJ_jb }, // FLT + { EJ_jnp, GT_AND, EJ_jbe }, // FLE + { EJ_jae }, // FGE + { EJ_ja }, // FGT + { EJ_jo }, // O + { EJ_jno }, // NO + + { EJ_je }, // FEQU + { EJ_jp, GT_OR, EJ_jne }, // FNEU + { EJ_jb }, // FLTU + { EJ_jbe }, // FLEU + { EJ_jp, GT_OR, EJ_jae }, // FGEU + { EJ_jp, GT_OR, EJ_ja }, // FGTU + { EJ_jp }, // P + { EJ_jnp }, // NP +}; +// clang-format on + +//------------------------------------------------------------------------ +// inst_SETCC: Generate code to set a register to 0 or 1 based on a condition. +// +// Arguments: +// condition - The condition +// type - The type of the value to be produced +// dstReg - The destination register to be set to 1 or 0 +// +void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg) +{ + assert(varTypeIsIntegral(type)); + assert(genIsValidIntReg(dstReg) && isByteReg(dstReg)); + + const GenConditionDesc& desc = GenConditionDesc::Get(condition); + + inst_SET(desc.jumpKind1, dstReg); + + if (desc.oper != GT_NONE) + { + BasicBlock* labelNext = genCreateTempLabel(); + inst_JMP((desc.oper == GT_OR) ? desc.jumpKind1 : emitter::emitReverseJumpKind(desc.jumpKind1), labelNext); + inst_SET(desc.jumpKind2, dstReg); + genDefineTempLabel(labelNext); + } + + if (!varTypeIsByte(type)) + { + GetEmitter()->emitIns_R_R(INS_movzx, EA_1BYTE, dstReg, dstReg); + } +} + +//------------------------------------------------------------------------ +// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. +// +// Arguments: +// tree - the GT_RETURNTRAP node +// +void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) +{ + assert(tree->OperGet() == GT_RETURNTRAP); + + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + GenTreeIntCon cns = intForm(TYP_INT, 0); + cns.SetContained(); + GetEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns); + + BasicBlock* skipLabel = genCreateTempLabel(); + + inst_JMP(EJ_je, skipLabel); + + // emit the call to the EE-helper that stops for GC (or other reasons) + regNumber tmpReg = tree->GetSingleTempReg(RBM_ALLINT); + assert(genIsValidIntReg(tmpReg)); + + genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg); + genDefineTempLabel(skipLabel); +} + +/***************************************************************************** + * + * Generate code for a single node in the tree. + * Preconditions: All operands have been evaluated + * + */ +void CodeGen::genCodeForTreeNode(GenTree* treeNode) +{ + regNumber targetReg; +#if !defined(TARGET_64BIT) + if (treeNode->TypeGet() == TYP_LONG) + { + // All long enregistered nodes will have been decomposed into their + // constituent lo and hi nodes. + targetReg = REG_NA; + } + else +#endif // !defined(TARGET_64BIT) + { + targetReg = treeNode->GetRegNum(); + } + var_types targetType = treeNode->TypeGet(); + emitter* emit = GetEmitter(); + +#ifdef DEBUG + // Validate that all the operands for the current node are consumed in order. + // This is important because LSRA ensures that any necessary copies will be + // handled correctly. + lastConsumedNode = nullptr; + if (compiler->verbose) + { + unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio + compiler->gtDispLIRNode(treeNode, "Generating: "); + } +#endif // DEBUG + + // Is this a node whose value is already in a register? LSRA denotes this by + // setting the GTF_REUSE_REG_VAL flag. + if (treeNode->IsReuseRegVal()) + { + // For now, this is only used for constant nodes. + assert((treeNode->OperIsConst())); + JITDUMP(" TreeNode is marked ReuseReg\n"); + return; + } + + // contained nodes are part of their parents for codegen purposes + // ex : immediates, most LEAs + if (treeNode->isContained()) + { + return; + } + + switch (treeNode->gtOper) + { +#ifndef JIT32_GCENCODER + case GT_START_NONGC: + GetEmitter()->emitDisableGC(); + break; +#endif // !defined(JIT32_GCENCODER) + + case GT_START_PREEMPTGC: + // Kill callee saves GC registers, and create a label + // so that information gets propagated to the emitter. + gcInfo.gcMarkRegSetNpt(RBM_INT_CALLEE_SAVED); + genDefineTempLabel(genCreateTempLabel()); + break; + + case GT_PROF_HOOK: +#ifdef PROFILING_SUPPORTED + // We should be seeing this only if profiler hook is needed + noway_assert(compiler->compIsProfilerHookNeeded()); + + // Right now this node is used only for tail calls. In future if + // we intend to use it for Enter or Leave hooks, add a data member + // to this node indicating the kind of profiler hook. For example, + // helper number can be used. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif // PROFILING_SUPPORTED + break; + + case GT_LCLHEAP: + genLclHeap(treeNode); + break; + + case GT_CNS_INT: +#ifdef TARGET_X86 + assert(!treeNode->IsIconHandle(GTF_ICON_TLS_HDL)); +#endif // TARGET_X86 + FALLTHROUGH; + + case GT_CNS_DBL: + genSetRegToConst(targetReg, targetType, treeNode); + genProduceReg(treeNode); + break; + + case GT_NOT: + case GT_NEG: + genCodeForNegNot(treeNode); + break; + + case GT_BSWAP: + case GT_BSWAP16: + genCodeForBswap(treeNode); + break; + + case GT_DIV: + if (varTypeIsFloating(treeNode->TypeGet())) + { + genCodeForBinary(treeNode->AsOp()); + break; + } + FALLTHROUGH; + case GT_MOD: + case GT_UMOD: + case GT_UDIV: + genCodeForDivMod(treeNode->AsOp()); + break; + + case GT_OR: + case GT_XOR: + case GT_AND: + assert(varTypeIsIntegralOrI(treeNode)); + + FALLTHROUGH; + +#if !defined(TARGET_64BIT) + case GT_ADD_LO: + case GT_ADD_HI: + case GT_SUB_LO: + case GT_SUB_HI: +#endif // !defined(TARGET_64BIT) + + case GT_ADD: + case GT_SUB: + genCodeForBinary(treeNode->AsOp()); + break; + + case GT_MUL: + if (varTypeIsFloating(treeNode->TypeGet())) + { + genCodeForBinary(treeNode->AsOp()); + break; + } + genCodeForMul(treeNode->AsOp()); + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + case GT_ROL: + case GT_ROR: + genCodeForShift(treeNode); + break; + +#if !defined(TARGET_64BIT) + + case GT_LSH_HI: + case GT_RSH_LO: + genCodeForShiftLong(treeNode); + break; + +#endif // !defined(TARGET_64BIT) + + case GT_CAST: + genCodeForCast(treeNode->AsOp()); + break; + + case GT_BITCAST: + genCodeForBitCast(treeNode->AsOp()); + break; + + case GT_LCL_FLD_ADDR: + case GT_LCL_VAR_ADDR: + genCodeForLclAddr(treeNode); + break; + + case GT_LCL_FLD: + genCodeForLclFld(treeNode->AsLclFld()); + break; + + case GT_LCL_VAR: + genCodeForLclVar(treeNode->AsLclVar()); + break; + + case GT_STORE_LCL_FLD: + genCodeForStoreLclFld(treeNode->AsLclFld()); + break; + + case GT_STORE_LCL_VAR: + genCodeForStoreLclVar(treeNode->AsLclVar()); + break; + + case GT_RETFILT: + case GT_RETURN: + genReturn(treeNode); + break; + + case GT_LEA: + // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction. + genLeaInstruction(treeNode->AsAddrMode()); + break; + + case GT_INDEX_ADDR: + genCodeForIndexAddr(treeNode->AsIndexAddr()); + break; + + case GT_IND: + genCodeForIndir(treeNode->AsIndir()); + break; + + case GT_MULHI: +#ifdef TARGET_X86 + case GT_MUL_LONG: +#endif + genCodeForMulHi(treeNode->AsOp()); + break; + + case GT_INTRINSIC: + genIntrinsic(treeNode); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD: + genSIMDIntrinsic(treeNode->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + genHWIntrinsic(treeNode->AsHWIntrinsic()); + break; +#endif // FEATURE_HW_INTRINSICS + + case GT_CKFINITE: + genCkfinite(treeNode); + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_TEST_EQ: + case GT_TEST_NE: + case GT_CMP: + genCodeForCompare(treeNode->AsOp()); + break; + + case GT_JTRUE: + genCodeForJumpTrue(treeNode->AsOp()); + break; + + case GT_JCC: + genCodeForJcc(treeNode->AsCC()); + break; + + case GT_SETCC: + genCodeForSetcc(treeNode->AsCC()); + break; + + case GT_BT: + genCodeForBT(treeNode->AsOp()); + break; + + case GT_RETURNTRAP: + genCodeForReturnTrap(treeNode->AsOp()); + break; + + case GT_STOREIND: + genCodeForStoreInd(treeNode->AsStoreInd()); + break; + + case GT_COPY: + // This is handled at the time we call genConsumeReg() on the GT_COPY + break; + + case GT_LIST: + case GT_FIELD_LIST: + // Should always be marked contained. + assert(!"LIST, FIELD_LIST nodes should always be marked contained."); + break; + + case GT_SWAP: + genCodeForSwap(treeNode->AsOp()); + break; + + case GT_PUTARG_STK: + genPutArgStk(treeNode->AsPutArgStk()); + break; + + case GT_PUTARG_REG: + genPutArgReg(treeNode->AsOp()); + break; + + case GT_CALL: + genCallInstruction(treeNode->AsCall()); + break; + + case GT_JMP: + genJmpMethod(treeNode); + break; + + case GT_LOCKADD: + genCodeForLockAdd(treeNode->AsOp()); + break; + + case GT_XCHG: + case GT_XADD: + genLockedInstructions(treeNode->AsOp()); + break; + + case GT_XORR: + case GT_XAND: + NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet."); + break; + + case GT_MEMORYBARRIER: + { + CodeGen::BarrierKind barrierKind = + treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL; + + instGen_MemoryBarrier(barrierKind); + break; + } + + case GT_CMPXCHG: + genCodeForCmpXchg(treeNode->AsCmpXchg()); + break; + + case GT_RELOAD: + // do nothing - reload is just a marker. + // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child + // into the register specified in this node. + break; + + case GT_NOP: + break; + + case GT_KEEPALIVE: + genConsumeRegs(treeNode->AsOp()->gtOp1); + break; + + case GT_NO_OP: + GetEmitter()->emitIns_Nop(1); + break; + + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif // FEATURE_HW_INTRINSICS + genRangeCheck(treeNode); + break; + + case GT_PHYSREG: + genCodeForPhysReg(treeNode->AsPhysReg()); + break; + + case GT_NULLCHECK: + genCodeForNullCheck(treeNode->AsIndir()); + break; + + case GT_CATCH_ARG: + + noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); + + /* Catch arguments get passed in a register. genCodeForBBlist() + would have marked it as holding a GC object, but not used. */ + + noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); + genConsumeReg(treeNode); + break; + +#if !defined(FEATURE_EH_FUNCLETS) + case GT_END_LFIN: + + // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates: + // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var + + size_t finallyNesting; + finallyNesting = treeNode->AsVal()->gtVal1; + noway_assert(treeNode->AsVal()->gtVal1 < compiler->compHndBBtabCount); + noway_assert(finallyNesting < compiler->compHndBBtabCount); + + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > + TARGET_POINTER_SIZE); // below doesn't underflow. + filterEndOffsetSlotOffs = + (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + + size_t curNestingSlotOffs; + curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE); + GetEmitter()->emitIns_S_I(INS_mov, EA_PTRSIZE, compiler->lvaShadowSPslotsVar, (unsigned)curNestingSlotOffs, + 0); + break; +#endif // !FEATURE_EH_FUNCLETS + + case GT_PINVOKE_PROLOG: + noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); + +#ifdef PSEUDORANDOM_NOP_INSERTION + // the runtime side requires the codegen here to be consistent + emit->emitDisableRandomNops(); +#endif // PSEUDORANDOM_NOP_INSERTION + break; + + case GT_LABEL: + genPendingCallLabel = genCreateTempLabel(); + emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->GetRegNum()); + break; + + case GT_STORE_OBJ: + case GT_STORE_DYN_BLK: + case GT_STORE_BLK: + genCodeForStoreBlk(treeNode->AsBlk()); + break; + + case GT_JMPTABLE: + genJumpTable(treeNode); + break; + + case GT_SWITCH_TABLE: + genTableBasedSwitch(treeNode); + break; + + case GT_ARR_INDEX: + genCodeForArrIndex(treeNode->AsArrIndex()); + break; + + case GT_ARR_OFFSET: + genCodeForArrOffset(treeNode->AsArrOffs()); + break; + + case GT_CLS_VAR_ADDR: + emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->AsClsVar()->gtClsVarHnd, 0); + genProduceReg(treeNode); + break; + +#if !defined(TARGET_64BIT) + case GT_LONG: + assert(treeNode->isUsedFromReg()); + genConsumeRegs(treeNode); + break; +#endif + + case GT_IL_OFFSET: + // Do nothing; these nodes are simply markers for debug info. + break; + + default: + { +#ifdef DEBUG + char message[256]; + _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n", + GenTree::OpName(treeNode->OperGet())); + NYIRAW(message); +#endif + assert(!"Unknown node in codegen"); + } + break; + } +} + +#ifdef FEATURE_SIMD +//---------------------------------------------------------------------------------- +// genMultiRegStoreToSIMDLocal: store multi-reg value to a single-reg SIMD local +// +// Arguments: +// lclNode - GentreeLclVar of GT_STORE_LCL_VAR +// +// Return Value: +// None +// +void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) +{ +#ifdef UNIX_AMD64_ABI + regNumber dst = lclNode->GetRegNum(); + GenTree* op1 = lclNode->gtGetOp1(); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + unsigned regCount = + actualOp1->IsMultiRegLclVar() ? actualOp1->AsLclVar()->GetFieldCount(compiler) : actualOp1->GetMultiRegCount(); + assert(op1->IsMultiRegNode()); + genConsumeRegs(op1); + + // Right now the only enregistrable structs supported are SIMD types. + // They are only returned in 1 or 2 registers - the 1 register case is + // handled as a regular STORE_LCL_VAR. + // This case is always a call (AsCall() will assert if it is not). + GenTreeCall* call = actualOp1->AsCall(); + const ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT); + + assert(regCount == 2); + assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0))); + assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1))); + + // This is a case where the two 8-bytes that comprise the operand are in + // two different xmm registers and need to be assembled into a single + // xmm register. + regNumber targetReg = lclNode->GetRegNum(); + regNumber reg0 = call->GetRegNumByIdx(0); + regNumber reg1 = call->GetRegNumByIdx(1); + + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0); + if (reloadReg != REG_NA) + { + reg0 = reloadReg; + } + + reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1); + if (reloadReg != REG_NA) + { + reg1 = reloadReg; + } + } + + if (targetReg != reg0 && targetReg != reg1) + { + // targetReg = reg0; + // targetReg[127:64] = reg1[127:64] + inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE); + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00); + } + else if (targetReg == reg0) + { + // (elided) targetReg = reg0 + // targetReg[127:64] = reg1[127:64] + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00); + } + else + { + assert(targetReg == reg1); + // We need two shuffles to achieve this + // First: + // targetReg[63:0] = targetReg[63:0] + // targetReg[127:64] = reg0[63:0] + // + // Second: + // targetReg[63:0] = targetReg[127:64] + // targetReg[127:64] = targetReg[63:0] + // + // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg + // and next swap low and high 8-bytes of targetReg to have them + // rearranged in the right order. + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00); + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01); + } + genProduceReg(lclNode); +#else // !UNIX_AMD64_ABI + assert(!"Multireg store to SIMD reg not supported on Windows"); +#endif // !UNIX_AMD64_ABI +} +#endif // FEATURE_SIMD + +//------------------------------------------------------------------------ +// genAllocLclFrame: Probe the stack and allocate the local stack frame - subtract from SP. +// +// Arguments: +// frameSize - the size of the stack frame being allocated. +// initReg - register to use as a scratch register. +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. +// maskArgRegsLiveIn - incoming argument registers that are currently live. +// +// Return value: +// None +// +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) +{ + assert(compiler->compGeneratingProlog); + + if (frameSize == 0) + { + return; + } + + const target_size_t pageSize = compiler->eeGetPageSize(); + + if (frameSize == REGSIZE_BYTES) + { + // Frame size is the same as register size. + GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_EAX); + compiler->unwindAllocStack(frameSize); + } + else if (frameSize < pageSize) + { + GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize); + compiler->unwindAllocStack(frameSize); + + const unsigned lastProbedLocToFinalSp = frameSize; + + if (lastProbedLocToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) + { + // We haven't probed almost a complete page. If the next action on the stack might subtract from SP + // first, before touching the current SP, then we need to probe at the very bottom. This can + // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" + // strategy. + GetEmitter()->emitIns_R_AR(INS_test, EA_4BYTE, REG_EAX, REG_SPBASE, 0); + } + } + else + { +#ifdef TARGET_X86 + int spOffset = -(int)frameSize; + + if (compiler->info.compPublishStubParam) + { + GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_SECRET_STUB_PARAM); + spOffset += REGSIZE_BYTES; + } + + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, spOffset); + regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); + + genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN); + + if (compiler->info.compPublishStubParam) + { + GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, REG_SECRET_STUB_PARAM); + GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize); + } + else + { + GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG); + } +#else // !TARGET_X86 + static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == + RBM_NONE); + + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize); + regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); + + genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN); + + if (initReg == REG_DEFAULT_HELPER_CALL_TARGET) + { + *pInitRegZeroed = false; + } + + static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RBM_NONE); + + GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG); +#endif // !TARGET_X86 + + compiler->unwindAllocStack(frameSize); + + if (initReg == REG_STACK_PROBE_HELPER_ARG) + { + *pInitRegZeroed = false; + } + } + +#ifdef USING_SCOPE_INFO + if (!doubleAlignOrFramePointerUsed()) + { + psiAdjustStackLevel(frameSize); + } +#endif // USING_SCOPE_INFO +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustment: add a specified constant value to the stack pointer. +// No probe is done. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. +// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP +// adjustment from the emitter, using this register. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) +{ + assert(spDelta < 0); + + // We assert that the SP change is less than one page. If it's greater, you should have called a + // function that does a probe, which will in turn call this function. + assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); + +#ifdef TARGET_X86 + if (regTmp != REG_NA) + { + // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment + // to ESP. So do the work in the count register. + // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require + // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't + // track". + inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); + inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + } + else +#endif // TARGET_X86 + { + inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); + } +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Should only be called as a helper for +// genStackPointerConstantAdjustmentLoopWithProbe. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, +// but the stack pointer doesn't move. +// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP +// adjustment from the emitter, using this register. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) +{ + GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + genStackPointerConstantAdjustment(spDelta, regTmp); +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Generates one probe per page, up to the total amount required. +// This will generate a sequence of probes in-line. It is required for the case where we need to expose +// (not hide) the stack level adjustment. We can't use the dynamic loop in that case, because the total +// stack adjustment would not be visible to the emitter. It would be possible to use this version for +// multiple hidden constant stack level adjustments but we don't do that currently (we use the loop +// version in genStackPointerDynamicAdjustmentWithProbe instead). +// +// Arguments: +// spDelta - the value to add to SP. Must be negative. +// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP +// adjustment from the emitter, using this register. +// +// Return Value: +// Offset in bytes from SP to last probed address. +// +target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) +{ + assert(spDelta < 0); + + const target_size_t pageSize = compiler->eeGetPageSize(); + + ssize_t spRemainingDelta = spDelta; + do + { + ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); + genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp); + spRemainingDelta -= spOneDelta; + } while (spRemainingDelta < 0); + + // What offset from the final SP was the last probe? This depends on the fact that + // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP". + target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize; + if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)) + { + // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact + // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed + // the page, but very far from the end. If the next action on the stack might subtract from SP + // first, before touching the current SP, then we do one more probe at the very bottom. This can + // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" + // strategy. + + GetEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, 0); + lastTouchDelta = 0; + } + + return lastTouchDelta; +} + +//------------------------------------------------------------------------ +// genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer, +// and probe the stack as appropriate. +// +// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently, +// requires a temporary register and extra code. +// +// Arguments: +// regSpDelta - the register value to add to SP. The value in this register must be negative. +// This register might be trashed. +// regTmp - an available temporary register. Will be trashed. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp) +{ + assert(regSpDelta != REG_NA); + assert(regTmp != REG_NA); + + // Tickle the pages to ensure that ESP is always valid and is + // in sync with the "stack guard page". Note that in the worst + // case ESP is on the last byte of the guard page. Thus you must + // touch ESP-0 first not ESP-0x1000. + // + // Another subtlety is that you don't want ESP to be exactly on the + // boundary of the guard page because PUSH is predecrement, thus + // call setup would not touch the guard page but just beyond it. + // + // Note that we go through a few hoops so that ESP never points to + // illegal pages at any time during the tickling process + // + // add regSpDelta, ESP // reg now holds ultimate ESP + // jb loop // result is smaller than original ESP (no wrap around) + // xor regSpDelta, regSpDelta // Overflow, pick lowest possible number + // loop: + // test ESP, [ESP+0] // tickle the page + // mov regTmp, ESP + // sub regTmp, eeGetPageSize() + // mov ESP, regTmp + // cmp ESP, regSpDelta + // jae loop + // mov ESP, regSpDelta + + BasicBlock* loop = genCreateTempLabel(); + + inst_RV_RV(INS_add, regSpDelta, REG_SPBASE, TYP_I_IMPL); + inst_JMP(EJ_jb, loop); + + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regSpDelta); + + genDefineTempLabel(loop); + + // Tickle the decremented value. Note that it must be done BEFORE the update of ESP since ESP might already + // be on the guard page. It is OK to leave the final value of ESP on the guard page. + GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + + // Subtract a page from ESP. This is a trick to avoid the emitter trying to track the + // decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly. + inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); + inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + + inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL); + inst_JMP(EJ_jae, loop); + + // Move the final value to ESP + inst_RV_RV(INS_mov, REG_SPBASE, regSpDelta); +} + +//------------------------------------------------------------------------ +// genLclHeap: Generate code for localloc. +// +// Arguments: +// tree - the localloc tree to generate. +// +// Notes: +// Note that for x86, we don't track ESP movements while generating the localloc code. +// The ESP tracking is used to report stack pointer-relative GC info, which is not +// interesting while doing the localloc construction. Also, for functions with localloc, +// we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function +// call arguments. +// +// For x86, we store the ESP after the localloc is complete in the LocAllocSP +// variable. This variable is implicitly reported to the VM in the GC info (its position +// is defined by convention relative to other items), and is used by the GC to find the +// "base" stack pointer in functions with localloc. +// +void CodeGen::genLclHeap(GenTree* tree) +{ + assert(tree->OperGet() == GT_LCLHEAP); + assert(compiler->compLocallocUsed); + + GenTree* size = tree->AsOp()->gtOp1; + noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + + regNumber targetReg = tree->GetRegNum(); + regNumber regCnt = REG_NA; + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + BasicBlock* endLabel = nullptr; + target_ssize_t lastTouchDelta = (target_ssize_t)-1; + +#ifdef DEBUG + genStackPointerCheck(compiler->opts.compStackCheckOnRet, compiler->lvaReturnSpCheck); +#endif + + noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes + noway_assert(genStackLevel == 0); // Can't have anything on the stack + + unsigned stackAdjustment = 0; + + // compute the amount of memory to allocate to properly STACK_ALIGN. + size_t amount = 0; + if (size->IsCnsIntOrI()) + { + // If size is a constant, then it must be contained. + assert(size->isContained()); + + // If amount is zero then return null in targetReg + amount = size->AsIntCon()->gtIconVal; + if (amount == 0) + { + instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); + goto BAILOUT; + } + + // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN + amount = AlignUp(amount, STACK_ALIGN); + } + else + { + // The localloc requested memory size is non-constant. + + // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg. + genConsumeRegAndCopy(size, targetReg); + endLabel = genCreateTempLabel(); + GetEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg); + inst_JMP(EJ_je, endLabel); + + // Compute the size of the block to allocate and perform alignment. + // If compInitMem=true, we can reuse targetReg as regcnt, + // since we don't need any internal registers. + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + if (regCnt != targetReg) + { + // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. + inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); + } + } + + // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done + // by code like: + // add reg, 15 + // and reg, -16 + // However, in the initialized memory case, we need the count of STACK_ALIGN-sized + // elements, not a byte count, after the alignment. So instead of the "and", which + // becomes unnecessary, generate a shift, e.g.: + // add reg, 15 + // shr reg, 4 + + inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type)); + + if (compiler->info.compInitMem) + { + // Convert the count from a count of bytes to a loop count. We will loop once per + // stack alignment size, so each loop will zero 4 bytes on Windows/x86, and 16 bytes + // on x64 and Linux/x86. + // + // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size + // words per iteration on x64. We will shift off all the stack alignment bits + // added above, so there is no need for an 'and' instruction. + + // --- shr regCnt, 2 (or 4) --- + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT); + } + else + { + // Otherwise, mask off the low bits to align the byte count. + inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); + } + } + +#if FEATURE_FIXED_OUT_ARGS + // If we have an outgoing arg area then we must adjust the SP by popping off the + // outgoing arg area. We will restore it right before we return from this method. + // + // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following + // are the cases that need to be handled: + // i) Method has out-going arg area. + // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs). + // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space. + // ii) Method has no out-going arg area. + // Nothing to pop off from the stack. + if (compiler->lvaOutgoingArgSpaceSize > 0) + { + assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain + // aligned + inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); + stackAdjustment += compiler->lvaOutgoingArgSpaceSize; + } +#endif + + if (size->IsCnsIntOrI()) + { + // We should reach here only for non-zero, constant size allocations. + assert(amount > 0); + assert((amount % STACK_ALIGN) == 0); + assert((amount % REGSIZE_BYTES) == 0); + + // For small allocations we will generate up to six push 0 inline + size_t cntRegSizedWords = amount / REGSIZE_BYTES; + if (cntRegSizedWords <= 6) + { + for (; cntRegSizedWords != 0; cntRegSizedWords--) + { + inst_IV(INS_push_hide, 0); // push_hide means don't track the stack + } + + lastTouchDelta = 0; + + goto ALLOC_DONE; + } + + bool initMemOrLargeAlloc = + compiler->info.compInitMem || (amount >= compiler->eeGetPageSize()); // must be >= not > + +#ifdef TARGET_X86 + bool needRegCntRegister = true; +#else // !TARGET_X86 + bool needRegCntRegister = initMemOrLargeAlloc; +#endif // !TARGET_X86 + + if (needRegCntRegister) + { + // If compInitMem=true, we can reuse targetReg as regcnt. + // Since size is a constant, regCnt is not yet initialized. + assert(regCnt == REG_NA); + if (compiler->info.compInitMem) + { + assert(tree->AvailableTempRegCount() == 0); + regCnt = targetReg; + } + else + { + regCnt = tree->ExtractTempReg(); + } + } + + if (!initMemOrLargeAlloc) + { + // Since the size is less than a page, and we don't need to zero init memory, simply adjust ESP. + // ESP might already be in the guard page, so we must touch it BEFORE + // the alloc, not after. + + assert(amount < compiler->eeGetPageSize()); // must be < not <= + lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, regCnt); + goto ALLOC_DONE; + } + + // else, "mov regCnt, amount" + + if (compiler->info.compInitMem) + { + // When initializing memory, we want 'amount' to be the loop count. + assert((amount % STACK_ALIGN) == 0); + amount /= STACK_ALIGN; + } + + genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); + } + + if (compiler->info.compInitMem) + { + // At this point 'regCnt' is set to the number of loop iterations for this loop, if each + // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes. + // Since we have to zero out the allocated memory AND ensure that RSP is always valid + // by tickling the pages, we will just push 0's on the stack. + + assert(genIsValidIntReg(regCnt)); + + // Loop: + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0); + unsigned const count = (STACK_ALIGN / REGSIZE_BYTES); + + for (unsigned i = 0; i < count; i++) + { + inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0 + } + // Note that the stack must always be aligned to STACK_ALIGN bytes + + // Decrement the loop counter and loop if not done. + inst_RV(INS_dec, regCnt, TYP_I_IMPL); + inst_JMP(EJ_jne, loop); + + lastTouchDelta = 0; + } + else + { + // At this point 'regCnt' is set to the total number of bytes to localloc. + // Negate this value before calling the function to adjust the stack (which + // adds to ESP). + + inst_RV(INS_NEG, regCnt, TYP_I_IMPL); + regNumber regTmp = tree->GetSingleTempReg(); + genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp); + + // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, + // we're going to assume the worst and probe. + } + +ALLOC_DONE: + // Re-adjust SP to allocate out-going arg area. Note: this also requires probes, if we have + // a very large stack adjustment! For simplicity, we use the same function used elsewhere, + // which probes the current address before subtracting. We may end up probing multiple + // times relatively "nearby". + if (stackAdjustment > 0) + { + assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned + assert(lastTouchDelta >= -1); + + if ((lastTouchDelta == (target_ssize_t)-1) || + (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > + compiler->eeGetPageSize())) + { + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, REG_NA); + } + else + { + genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, REG_NA); + } + } + + // Return the stackalloc'ed address in result register. + // TargetReg = RSP + stackAdjustment. + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment); + + if (endLabel != nullptr) + { + genDefineTempLabel(endLabel); + } + +BAILOUT: + +#ifdef JIT32_GCENCODER + if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) + { + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + } +#endif // JIT32_GCENCODER + +#ifdef DEBUG + // Update local variable to reflect the new stack pointer. + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnSpCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnSpCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnSpCheck].lvOnFrame); + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnSpCheck, 0); + } +#endif + + genProduceReg(tree); +} + +void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) +{ + assert(storeBlkNode->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK)); + + if (storeBlkNode->OperIs(GT_STORE_OBJ)) + { +#ifndef JIT32_GCENCODER + assert(!storeBlkNode->gtBlkOpGcUnsafe); +#endif + assert(storeBlkNode->OperIsCopyBlkOp()); + assert(storeBlkNode->AsObj()->GetLayout()->HasGCPtr()); + genCodeForCpObj(storeBlkNode->AsObj()); + return; + } + + bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp(); + + switch (storeBlkNode->gtBlkOpKind) + { +#ifdef TARGET_AMD64 + case GenTreeBlk::BlkOpKindHelper: + assert(!storeBlkNode->gtBlkOpGcUnsafe); + if (isCopyBlk) + { + genCodeForCpBlkHelper(storeBlkNode); + } + else + { + genCodeForInitBlkHelper(storeBlkNode); + } + break; +#endif // TARGET_AMD64 + case GenTreeBlk::BlkOpKindRepInstr: +#ifndef JIT32_GCENCODER + assert(!storeBlkNode->gtBlkOpGcUnsafe); +#endif + if (isCopyBlk) + { + genCodeForCpBlkRepMovs(storeBlkNode); + } + else + { + genCodeForInitBlkRepStos(storeBlkNode); + } + break; + case GenTreeBlk::BlkOpKindUnroll: + if (isCopyBlk) + { +#ifndef JIT32_GCENCODER + if (storeBlkNode->gtBlkOpGcUnsafe) + { + GetEmitter()->emitDisableGC(); + } +#endif + genCodeForCpBlkUnroll(storeBlkNode); +#ifndef JIT32_GCENCODER + if (storeBlkNode->gtBlkOpGcUnsafe) + { + GetEmitter()->emitEnableGC(); + } +#endif + } + else + { +#ifndef JIT32_GCENCODER + assert(!storeBlkNode->gtBlkOpGcUnsafe); +#endif + genCodeForInitBlkUnroll(storeBlkNode); + } + break; + default: + unreached(); + } +} + +// +//------------------------------------------------------------------------ +// genCodeForInitBlkRepStos: Generate code for InitBlk using rep stos. +// +// Arguments: +// initBlkNode - The Block store for which we are generating code. +// +void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode) +{ + genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX); + instGen(INS_r_stosb); +} + +//---------------------------------------------------------------------------------- +// genCodeForInitBlkUnroll: Generate unrolled block initialization code. +// +// Arguments: +// node - the GT_STORE_BLK node to generate code for +// +void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) +{ + assert(node->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + regNumber dstAddrIndexReg = REG_NA; + unsigned dstAddrIndexScale = 1; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + GenTreeAddrMode* addrMode = dstAddr->AsAddrMode(); + + if (addrMode->HasBase()) + { + dstAddrBaseReg = genConsumeReg(addrMode->Base()); + } + + if (addrMode->HasIndex()) + { + dstAddrIndexReg = genConsumeReg(addrMode->Index()); + dstAddrIndexScale = addrMode->GetScale(); + } + + dstOffset = addrMode->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs(); + } + + regNumber srcIntReg = REG_NA; + GenTree* src = node->Data(); + + if (src->OperIs(GT_INIT_VAL)) + { + assert(src->isContained()); + src = src->AsUnOp()->gtGetOp1(); + } + + if (!src->isContained()) + { + srcIntReg = genConsumeReg(src); + } + else + { + // If src is contained then it must be 0 and the size must be a multiple + // of XMM_REGSIZE_BYTES so initialization can use only SSE2 instructions. + assert(src->IsIntegralConst(0)); + assert((node->GetLayout()->GetSize() % XMM_REGSIZE_BYTES) == 0); + } + + emitter* emit = GetEmitter(); + unsigned size = node->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(dstOffset < (INT32_MAX - static_cast(size))); + + // Fill as much as possible using SSE2 stores. + if (size >= XMM_REGSIZE_BYTES) + { + regNumber srcXmmReg = node->GetSingleTempReg(RBM_ALLFLOAT); + + if (src->gtSkipReloadOrCopy()->IsIntegralConst(0)) + { + // If the source is constant 0 then always use xorps, it's faster + // than copying the constant from a GPR to a XMM register. + emit->emitIns_R_R(INS_xorps, EA_16BYTE, srcXmmReg, srcXmmReg); + } + else + { + emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg); + emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); +#ifdef TARGET_X86 + // For x86, we need one more to convert it from 8 bytes to 16 bytes. + emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); +#endif + } + + instruction simdMov = simdUnalignedMovIns(); + for (unsigned regSize = XMM_REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize) + { + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(simdMov, EA_ATTR(regSize), srcXmmReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), srcXmmReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); + } + } + + // TODO-CQ-XArch: On x86 we could initialize 8 byte at once by using MOVQ instead of two 4 byte MOV stores. + // On x64 it may also be worth zero initializing a 4/8 byte remainder using MOVD/MOVQ, that avoids the need + // to allocate a GPR just for the remainder. + } + + // Fill the remainder using normal stores. + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); + } + } +} + +#ifdef TARGET_AMD64 +//------------------------------------------------------------------------ +// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call +// +// Arguments: +// initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) +{ + // Destination address goes in arg0, source address goes in arg1, and size goes in arg2. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); +} +#endif // TARGET_AMD64 + +#ifdef FEATURE_PUT_STRUCT_ARG_STK +// Generate code for a load from some address + offset +// baseNode: tree node which can be either a local address or arbitrary node +// offset: distance from the baseNode from which to load +void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset) +{ + emitter* emit = GetEmitter(); + + if (baseNode->OperIsLocalAddr()) + { + const GenTreeLclVarCommon* lclVar = baseNode->AsLclVarCommon(); + offset += lclVar->GetLclOffs(); + emit->emitIns_R_S(ins, size, dst, lclVar->GetLclNum(), offset); + } + else + { + emit->emitIns_R_AR(ins, size, dst, baseNode->GetRegNum(), offset); + } +} +#endif // FEATURE_PUT_STRUCT_ARG_STK + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkUnroll - Generate unrolled block copy code. +// +// Arguments: +// node - the GT_STORE_BLK node to generate code for +// +void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) +{ + assert(node->OperIs(GT_STORE_BLK)); + + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + regNumber dstAddrIndexReg = REG_NA; + unsigned dstAddrIndexScale = 1; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + GenTreeAddrMode* addrMode = dstAddr->AsAddrMode(); + + if (addrMode->HasBase()) + { + dstAddrBaseReg = genConsumeReg(addrMode->Base()); + } + + if (addrMode->HasIndex()) + { + dstAddrIndexReg = genConsumeReg(addrMode->Index()); + dstAddrIndexScale = addrMode->GetScale(); + } + + dstOffset = addrMode->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + const GenTreeLclVarCommon* lclVar = dstAddr->AsLclVarCommon(); + dstLclNum = lclVar->GetLclNum(); + dstOffset = lclVar->GetLclOffs(); + } + + unsigned srcLclNum = BAD_VAR_NUM; + regNumber srcAddrBaseReg = REG_NA; + regNumber srcAddrIndexReg = REG_NA; + unsigned srcAddrIndexScale = 1; + int srcOffset = 0; + GenTree* src = node->Data(); + + assert(src->isContained()); + + if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + srcLclNum = src->AsLclVarCommon()->GetLclNum(); + srcOffset = src->AsLclVarCommon()->GetLclOffs(); + } + else + { + assert(src->OperIs(GT_IND)); + GenTree* srcAddr = src->AsIndir()->Addr(); + + if (!srcAddr->isContained()) + { + srcAddrBaseReg = genConsumeReg(srcAddr); + } + else if (srcAddr->OperIsAddrMode()) + { + GenTreeAddrMode* addrMode = srcAddr->AsAddrMode(); + + if (addrMode->HasBase()) + { + srcAddrBaseReg = genConsumeReg(addrMode->Base()); + } + + if (addrMode->HasIndex()) + { + srcAddrIndexReg = genConsumeReg(addrMode->Index()); + srcAddrIndexScale = addrMode->GetScale(); + } + + srcOffset = addrMode->Offset(); + } + else + { + assert(srcAddr->OperIsLocalAddr()); + srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); + srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs(); + } + } + + emitter* emit = GetEmitter(); + unsigned size = node->GetLayout()->GetSize(); + + assert(size <= INT32_MAX); + assert(srcOffset < (INT32_MAX - static_cast(size))); + assert(dstOffset < (INT32_MAX - static_cast(size))); + + if (size >= XMM_REGSIZE_BYTES) + { + regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT); + + instruction simdMov = simdUnalignedMovIns(); + for (unsigned regSize = XMM_REGSIZE_BYTES; size >= regSize; + size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(simdMov, EA_ATTR(regSize), tempReg, srcLclNum, srcOffset); + } + else + { + emit->emitIns_R_ARX(simdMov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg, + srcAddrIndexScale, srcOffset); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(simdMov, EA_ATTR(regSize), tempReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); + } + } + + // TODO-CQ-XArch: On x86 we could copy 8 byte at once by using MOVQ instead of four 4 byte MOV stores. + // On x64 it may also be worth copying a 4/8 byte remainder using MOVD/MOVQ, that avoids the need to + // allocate a GPR just for the remainder. + } + + if (size > 0) + { + regNumber tempReg = node->GetSingleTempReg(RBM_ALLINT); + + for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, srcOffset += regSize, dstOffset += regSize) + { + while (regSize > size) + { + regSize /= 2; + } + + if (srcLclNum != BAD_VAR_NUM) + { + emit->emitIns_R_S(INS_mov, EA_ATTR(regSize), tempReg, srcLclNum, srcOffset); + } + else + { + emit->emitIns_R_ARX(INS_mov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg, + srcAddrIndexScale, srcOffset); + } + + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), tempReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); + } + } + } +} + +//---------------------------------------------------------------------------------- +// genCodeForCpBlkRepMovs - Generate code for CpBlk by using rep movs +// +// Arguments: +// cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode) +{ + // Destination address goes in RDI, source address goes in RSE, and size goes in RCX. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX); + instGen(INS_r_movsb); +} + +#ifdef FEATURE_PUT_STRUCT_ARG_STK +//------------------------------------------------------------------------ +// CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// longTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (8 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register. +// This is checked by genStoreRegToStackArg. +// +unsigned CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset) +{ +#ifdef TARGET_X86 + instruction longMovIns = INS_movq; +#else // !TARGET_X86 + instruction longMovIns = INS_mov; +#endif // !TARGET_X86 + if ((size & 8) != 0) + { + genCodeForLoadOffset(longMovIns, EA_8BYTE, longTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_LONG, longTmpReg, offset); + return 8; + } + return 0; +} + +//------------------------------------------------------------------------ +// CodeGen::genMove4IfNeeded: Conditionally move 4 bytes of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// intTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (4 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// intTmpReg must be an integer register. +// This is checked by genStoreRegToStackArg. +// +unsigned CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) +{ + if ((size & 4) != 0) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, intTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_INT, intTmpReg, offset); + return 4; + } + return 0; +} + +//------------------------------------------------------------------------ +// CodeGen::genMove2IfNeeded: Conditionally move 2 bytes of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// intTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (2 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// intTmpReg must be an integer register. +// This is checked by genStoreRegToStackArg. +// +unsigned CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) +{ + if ((size & 2) != 0) + { + genCodeForLoadOffset(INS_mov, EA_2BYTE, intTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_SHORT, intTmpReg, offset); + return 2; + } + return 0; +} + +//------------------------------------------------------------------------ +// CodeGen::genMove1IfNeeded: Conditionally move 1 byte of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// intTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (1 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// intTmpReg must be an integer register. +// This is checked by genStoreRegToStackArg. +// +unsigned CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) +{ + if ((size & 1) != 0) + { + genCodeForLoadOffset(INS_mov, EA_1BYTE, intTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_BYTE, intTmpReg, offset); + return 1; + } + return 0; +} + +//---------------------------------------------------------------------------------------------------------------// +// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling. +// +// Arguments: +// putArgNode - the PutArgStk tree. +// +// Notes: +// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct will be copied to the +// stack. +// +// TODO-Amd64-Unix: Try to share code with copyblk. +// Need refactoring of copyblk before it could be used for putarg_stk. +// The difference for now is that a putarg_stk contains its children, while cpyblk does not. +// This creates differences in code. After some significant refactoring it could be reused. +// +void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) +{ + GenTree* src = putArgNode->AsOp()->gtOp1; + // We will never call this method for SIMD types, which are stored directly + // in genPutStructArgStk(). + noway_assert(src->TypeGet() == TYP_STRUCT); + + unsigned size = putArgNode->GetStackByteSize(); + assert(size <= CPBLK_UNROLL_LIMIT); + + emitter* emit = GetEmitter(); + unsigned putArgOffset = putArgNode->getArgOffset(); + + assert(src->isContained()); + + assert(src->gtOper == GT_OBJ); + + if (src->AsOp()->gtOp1->isUsedFromReg()) + { + genConsumeReg(src->AsOp()->gtOp1); + } + + unsigned offset = 0; + + regNumber xmmTmpReg = REG_NA; + regNumber intTmpReg = REG_NA; + regNumber longTmpReg = REG_NA; +#ifdef TARGET_X86 + // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's + // less than 16 bytes, we will just be using pushes + if (size >= 8) + { + xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); + longTmpReg = xmmTmpReg; + } + if ((size & 0x7) != 0) + { + intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); + } +#else // !TARGET_X86 + // On x64 we use an XMM register only for 16-byte chunks. + if (size >= XMM_REGSIZE_BYTES) + { + xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); + } + if ((size & 0xf) != 0) + { + intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); + longTmpReg = intTmpReg; + } +#endif // !TARGET_X86 + + // If the size of this struct is larger than 16 bytes + // let's use SSE2 to be able to do 16 byte at a time + // loads and stores. + if (size >= XMM_REGSIZE_BYTES) + { +#ifdef TARGET_X86 + assert(!m_pushStkArg); +#endif // TARGET_X86 + size_t slots = size / XMM_REGSIZE_BYTES; + + assert(putArgNode->gtGetOp1()->isContained()); + assert(putArgNode->gtGetOp1()->AsOp()->gtOper == GT_OBJ); + + // TODO: In the below code the load and store instructions are for 16 bytes, but the + // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but + // this probably needs to be changed. + while (slots-- > 0) + { + // Load + genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src->gtGetOp1(), offset); + + // Store + genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset); + + offset += XMM_REGSIZE_BYTES; + } + } + + // Fill the remainder (15 bytes or less) if there's one. + if ((size & 0xf) != 0) + { +#ifdef TARGET_X86 + if (m_pushStkArg) + { + // This case is currently supported only for the case where the total size is + // less than XMM_REGSIZE_BYTES. We need to push the remaining chunks in reverse + // order. However, morph has ensured that we have a struct that is an even + // multiple of TARGET_POINTER_SIZE, so we don't need to worry about alignment. + assert(((size & 0xc) == size) && (offset == 0)); + // If we have a 4 byte chunk, load it from either offset 0 or 8, depending on + // whether we've got an 8 byte chunk, and then push it on the stack. + unsigned pushedBytes = genMove4IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, size & 0x8); + // Now if we have an 8 byte chunk, load it from offset 0 (it's the first chunk) + // and push it on the stack. + pushedBytes += genMove8IfNeeded(size, longTmpReg, src->AsOp()->gtOp1, 0); + } + else +#endif // TARGET_X86 + { + offset += genMove8IfNeeded(size, longTmpReg, src->AsOp()->gtOp1, offset); + offset += genMove4IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, offset); + offset += genMove2IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, offset); + offset += genMove1IfNeeded(size, intTmpReg, src->AsOp()->gtOp1, offset); + assert(offset == size); + } + } +} + +//------------------------------------------------------------------------ +// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs. +// +// Arguments: +// putArgNode - the PutArgStk tree. +// +// Preconditions: +// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct bits will go. +// +void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode) +{ + GenTree* srcAddr = putArgNode->gtGetOp1(); + assert(srcAddr->TypeGet() == TYP_STRUCT); + + // Make sure we got the arguments of the cpblk operation in the right registers, and that + // 'srcAddr' is contained as expected. + assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI)); + assert(srcAddr->isContained()); + + genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX); + instGen(INS_r_movsb); +} + +//------------------------------------------------------------------------ +// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits +// must be cleared to zeroes. The native compiler doesn't clear the upper bits +// and there is no way to know if the caller is native or not. So, the upper +// 32 bits of Vector argument on stack are always cleared to zero. +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) +void CodeGen::genClearStackVec3ArgUpperBits() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genClearStackVec3ArgUpperBits()\n"); + } +#endif + + assert(compiler->compGeneratingProlog); + + unsigned varNum = 0; + + for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++) + { + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + assert(varDsc->lvIsParam); + + // Does var has simd12 type? + if (varDsc->lvType != TYP_SIMD12) + { + continue; + } + + if (!varDsc->lvIsRegArg) + { + // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0 + GetEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0); + } + else + { + // Assume that for x64 linux, an argument is fully in registers + // or fully on stack. + regNumber argReg = varDsc->GetOtherArgReg(); + + // Clear the upper 32 bits by two shift instructions. + // argReg = argReg << 96 + GetEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12); + // argReg = argReg >> 96 + GetEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12); + } + } +} +#endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) +#endif // FEATURE_PUT_STRUCT_ARG_STK + +// +// genCodeForCpObj - Generate code for CpObj nodes to copy structs that have interleaved +// GC pointers. +// +// Arguments: +// cpObjNode - the GT_STORE_OBJ +// +// Notes: +// This will generate a sequence of movsp instructions for the cases of non-gc members. +// Note that movsp is an alias for movsd on x86 and movsq on x64. +// and calls to the BY_REF_ASSIGN helper otherwise. +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) +{ + // Make sure we got the arguments of the cpobj operation in the right registers + GenTree* dstAddr = cpObjNode->Addr(); + GenTree* source = cpObjNode->Data(); + GenTree* srcAddr = nullptr; + var_types srcAddrType = TYP_BYREF; + bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr(); + +#ifdef DEBUG + // If the GenTree node has data about GC pointers, this means we're dealing + // with CpObj, so this requires special logic. + assert(cpObjNode->GetLayout()->HasGCPtr()); + + // MovSp (alias for movsq on x64 and movsd on x86) instruction is used for copying non-gcref fields + // and it needs src = RSI and dst = RDI. + // Either these registers must not contain lclVars, or they must be dying or marked for spill. + // This is because these registers are incremented as we go through the struct. + if (!source->IsLocal()) + { + assert(source->gtOper == GT_IND); + srcAddr = source->gtGetOp1(); + GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy(); + GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy(); + unsigned srcLclVarNum = BAD_VAR_NUM; + unsigned dstLclVarNum = BAD_VAR_NUM; + bool isSrcAddrLiveOut = false; + bool isDstAddrLiveOut = false; + if (genIsRegCandidateLocal(actualSrcAddr)) + { + srcLclVarNum = actualSrcAddr->AsLclVarCommon()->GetLclNum(); + isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + if (genIsRegCandidateLocal(actualDstAddr)) + { + dstLclVarNum = actualDstAddr->AsLclVarCommon()->GetLclNum(); + isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + assert((actualSrcAddr->GetRegNum() != REG_RSI) || !isSrcAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut)); + assert((actualDstAddr->GetRegNum() != REG_RDI) || !isDstAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut)); + srcAddrType = srcAddr->TypeGet(); + } +#endif // DEBUG + + // Consume the operands and get them into the right registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA); + gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType); + gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); + + unsigned slots = cpObjNode->GetLayout()->GetSlotCount(); + + // If we can prove it's on the stack we don't need to use the write barrier. + if (dstOnStack) + { + if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) + { + // If the destination of the CpObj is on the stack, make sure we allocated + // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively). + assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); + + GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); + instGen(INS_r_movsp); + } + else + { + // For small structs, it's better to emit a sequence of movsp than to + // emit a rep movsp instruction. + while (slots > 0) + { + instGen(INS_movsp); + slots--; + } + } + } + else + { + ClassLayout* layout = cpObjNode->GetLayout(); + unsigned gcPtrCount = layout->GetGCPtrCount(); + + unsigned i = 0; + while (i < slots) + { + if (!layout->IsGCPtr(i)) + { + // Let's see if we can use rep movsp instead of a sequence of movsp instructions + // to save cycles and code size. + unsigned nonGcSlotCount = 0; + + do + { + nonGcSlotCount++; + i++; + } while ((i < slots) && !layout->IsGCPtr(i)); + + // If we have a very small contiguous non-gc region, it's better just to + // emit a sequence of movsp instructions + if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) + { + while (nonGcSlotCount > 0) + { + instGen(INS_movsp); + nonGcSlotCount--; + } + } + else + { + // Otherwise, we can save code-size and improve CQ by emitting + // rep movsp (alias for movsd/movsq for x86/x64) + assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); + + GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); + instGen(INS_r_movsp); + } + } + else + { + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); + gcPtrCount--; + i++; + } + } + + assert(gcPtrCount == 0); + } + + // Clear the gcInfo for RSI and RDI. + // While we normally update GC info prior to the last instruction that uses them, + // these actually live into the helper call. + gcInfo.gcMarkRegSetNpt(RBM_RSI); + gcInfo.gcMarkRegSetNpt(RBM_RDI); +} + +#ifdef TARGET_AMD64 +//---------------------------------------------------------------------------------- +// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call +// +// Arguments: +// cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK] +// +// Preconditions: +// The register assignments have been set appropriately. +// This is validated by genConsumeBlockOp(). +// +void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode) +{ + // Destination address goes in arg0, source address goes in arg1, and size goes in arg2. + // genConsumeBlockOp takes care of this for us. + genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); +} +#endif // TARGET_AMD64 + +// generate code do a switch statement based on a table of ip-relative offsets +void CodeGen::genTableBasedSwitch(GenTree* treeNode) +{ + genConsumeOperands(treeNode->AsOp()); + regNumber idxReg = treeNode->AsOp()->gtOp1->GetRegNum(); + regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum(); + + regNumber tmpReg = treeNode->GetSingleTempReg(); + + // load the ip-relative offset (which is relative to start of fgFirstBB) + GetEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0); + + // add it to the absolute address of fgFirstBB + GetEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg); + GetEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg); + // jmp baseReg + GetEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg); +} + +// emits the table and an instruction to get the address of the first element +void CodeGen::genJumpTable(GenTree* treeNode) +{ + noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); + assert(treeNode->OperGet() == GT_JMPTABLE); + + unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; + BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; + unsigned jmpTabOffs; + unsigned jmpTabBase; + + jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true); + + jmpTabOffs = 0; + + JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", compiler->compMethodID, jmpTabBase); + + for (unsigned i = 0; i < jumpCount; i++) + { + BasicBlock* target = *jumpTable++; + noway_assert(target->bbFlags & BBF_HAS_LABEL); + + JITDUMP(" DD L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum); + + GetEmitter()->emitDataGenData(i, target); + }; + + GetEmitter()->emitDataGenEnd(); + + // Access to inline data is 'abstracted' by a special type of static member + // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference + // to constant data, not a real static field. + GetEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), + compiler->eeFindJitDataOffs(jmpTabBase), 0); + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCodeForLockAdd: Generate code for a GT_LOCKADD node +// +// Arguments: +// node - the GT_LOCKADD node +// +void CodeGen::genCodeForLockAdd(GenTreeOp* node) +{ + assert(node->OperIs(GT_LOCKADD)); + + GenTree* addr = node->gtGetOp1(); + GenTree* data = node->gtGetOp2(); + emitAttr size = emitActualTypeSize(data->TypeGet()); + + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg() || data->isContainedIntOrIImmed()); + assert((size == EA_4BYTE) || (size == EA_PTRSIZE)); + + genConsumeOperands(node); + instGen(INS_lock); + + if (data->isContainedIntOrIImmed()) + { + int imm = static_cast(data->AsIntCon()->IconValue()); + assert(imm == data->AsIntCon()->IconValue()); + GetEmitter()->emitIns_I_AR(INS_add, size, imm, addr->GetRegNum(), 0); + } + else + { + GetEmitter()->emitIns_AR_R(INS_add, size, data->GetRegNum(), addr->GetRegNum(), 0); + } +} + +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. +// +// Arguments: +// node - the GT_XADD/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* node) +{ + assert(node->OperIs(GT_XADD, GT_XCHG)); + + GenTree* addr = node->gtGetOp1(); + GenTree* data = node->gtGetOp2(); + emitAttr size = emitTypeSize(node->TypeGet()); + + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg()); + assert((size == EA_4BYTE) || (size == EA_PTRSIZE)); + + genConsumeOperands(node); + + if (node->GetRegNum() != data->GetRegNum()) + { + // If the destination register is different from the data register then we need + // to first move the data to the target register. Make sure we don't overwrite + // the address, the register allocator should have taken care of this. + assert(node->GetRegNum() != addr->GetRegNum()); + GetEmitter()->emitIns_R_R(INS_mov, size, node->GetRegNum(), data->GetRegNum()); + } + + instruction ins = node->OperIs(GT_XADD) ? INS_xadd : INS_xchg; + + // XCHG has an implied lock prefix when the first operand is a memory operand. + if (ins != INS_xchg) + { + instGen(INS_lock); + } + + GetEmitter()->emitIns_AR_R(ins, size, node->GetRegNum(), addr->GetRegNum(), 0); + genProduceReg(node); +} + +//------------------------------------------------------------------------ +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* tree) +{ + assert(tree->OperIs(GT_CMPXCHG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + GenTree* location = tree->gtOpLocation; // arg1 + GenTree* value = tree->gtOpValue; // arg2 + GenTree* comparand = tree->gtOpComparand; // arg3 + + assert(location->GetRegNum() != REG_NA && location->GetRegNum() != REG_RAX); + assert(value->GetRegNum() != REG_NA && value->GetRegNum() != REG_RAX); + + genConsumeReg(location); + genConsumeReg(value); + genConsumeReg(comparand); + + // comparand goes to RAX; + // Note that we must issue this move after the genConsumeRegs(), in case any of the above + // have a GT_COPY from RAX. + if (comparand->GetRegNum() != REG_RAX) + { + inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->GetRegNum(), comparand->TypeGet()); + } + + // location is Rm + instGen(INS_lock); + + GetEmitter()->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->GetRegNum(), location->GetRegNum(), 0); + + // Result is in RAX + if (targetReg != REG_RAX) + { + inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType); + } + + genProduceReg(tree); +} + +// generate code for BoundsCheck nodes +void CodeGen::genRangeCheck(GenTree* oper) +{ + noway_assert(oper->OperIsBoundsCheck()); + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTree* arrIndex = bndsChk->gtIndex; + GenTree* arrLen = bndsChk->gtArrLen; + + GenTree * src1, *src2; + emitJumpKind jmpKind; + instruction cmpKind; + + genConsumeRegs(arrIndex); + genConsumeRegs(arrLen); + + if (arrIndex->IsIntegralConst(0) && arrLen->isUsedFromReg()) + { + // arrIndex is 0 and arrLen is in a reg. In this case + // we can generate + // test reg, reg + // since arrLen is non-negative + src1 = arrLen; + src2 = arrLen; + jmpKind = EJ_je; + cmpKind = INS_test; + } + else if (arrIndex->isContainedIntOrIImmed()) + { + // arrIndex is a contained constant. In this case + // we will generate one of the following + // cmp [mem], immed (if arrLen is a memory op) + // cmp reg, immed (if arrLen is in a reg) + // + // That is arrLen cannot be a contained immed. + assert(!arrLen->isContainedIntOrIImmed()); + + src1 = arrLen; + src2 = arrIndex; + jmpKind = EJ_jbe; + cmpKind = INS_cmp; + } + else + { + // arrIndex could either be a contained memory op or a reg + // In this case we will generate one of the following + // cmp [mem], immed (if arrLen is a constant) + // cmp [mem], reg (if arrLen is in a reg) + // cmp reg, immed (if arrIndex is in a reg) + // cmp reg1, reg2 (if arrIndex is in reg1) + // cmp reg, [mem] (if arrLen is a memory op) + // + // That is only one of arrIndex or arrLen can be a memory op. + assert(!arrIndex->isUsedFromMemory() || !arrLen->isUsedFromMemory()); + + src1 = arrIndex; + src2 = arrLen; + jmpKind = EJ_jae; + cmpKind = INS_cmp; + } + + var_types bndsChkType = src2->TypeGet(); +#if DEBUG + // Bounds checks can only be 32 or 64 bit sized comparisons. + assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); + + // The type of the bounds check should always wide enough to compare against the index. + assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet())); +#endif // DEBUG + + GetEmitter()->emitInsBinary(cmpKind, emitTypeSize(bndsChkType), src1, src2); + genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB); +} + +//--------------------------------------------------------------------- +// genCodeForPhysReg - generate code for a GT_PHYSREG node +// +// Arguments +// tree - the GT_PHYSREG node +// +// Return value: +// None +// +void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) +{ + assert(tree->OperIs(GT_PHYSREG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + if (targetReg != tree->gtSrcReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType); + genTransferRegGCState(targetReg, tree->gtSrcReg); + } + + genProduceReg(tree); +} + +//--------------------------------------------------------------------- +// genCodeForNullCheck - generate code for a GT_NULLCHECK node +// +// Arguments +// tree - the GT_NULLCHECK node +// +// Return value: +// None +// +void CodeGen::genCodeForNullCheck(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_NULLCHECK)); + + assert(tree->gtOp1->isUsedFromReg()); + regNumber reg = genConsumeReg(tree->gtOp1); + GetEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0); +} + +//------------------------------------------------------------------------ +// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the +// lower bound for the given dimension. +// +// Arguments: +// elemType - the element type of the array +// rank - the rank of the array +// dimension - the dimension for which the lower bound offset will be returned. +// +// Return Value: +// The offset. + +unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) +{ + // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. + return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); +} + +//------------------------------------------------------------------------ +// genOffsetOfMDArrayLength: Returns the offset from the Array object to the +// size for the given dimension. +// +// Arguments: +// elemType - the element type of the array +// rank - the rank of the array +// dimension - the dimension for which the lower bound offset will be returned. +// +// Return Value: +// The offset. + +unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) +{ + // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. + return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; +} + +//------------------------------------------------------------------------ +// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, +// producing the effective index by subtracting the lower bound. +// +// Arguments: +// arrIndex - the node for which we're generating code +// +// Return Value: +// None. +// + +void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) +{ + GenTree* arrObj = arrIndex->ArrObj(); + GenTree* indexNode = arrIndex->IndexExpr(); + + regNumber arrReg = genConsumeReg(arrObj); + regNumber indexReg = genConsumeReg(indexNode); + regNumber tgtReg = arrIndex->GetRegNum(); + + unsigned dim = arrIndex->gtCurrDim; + unsigned rank = arrIndex->gtArrRank; + var_types elemType = arrIndex->gtArrElemType; + + noway_assert(tgtReg != REG_NA); + + // Subtract the lower bound for this dimension. + // TODO-XArch-CQ: make this contained if it's an immediate that fits. + if (tgtReg != indexReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet()); + } + GetEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg, + genOffsetOfMDArrayLowerBound(elemType, rank, dim)); + GetEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg, + genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); + genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL); + + genProduceReg(arrIndex); +} + +//------------------------------------------------------------------------ +// genCodeForArrOffset: Generates code to compute the flattened array offset for +// one dimension of an array reference: +// result = (prevDimOffset * dimSize) + effectiveIndex +// where dimSize is obtained from the arrObj operand +// +// Arguments: +// arrOffset - the node for which we're generating code +// +// Return Value: +// None. +// +// Notes: +// dimSize and effectiveIndex are always non-negative, the former by design, +// and the latter because it has been normalized to be zero-based. + +void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) +{ + GenTree* offsetNode = arrOffset->gtOffset; + GenTree* indexNode = arrOffset->gtIndex; + GenTree* arrObj = arrOffset->gtArrObj; + + regNumber tgtReg = arrOffset->GetRegNum(); + assert(tgtReg != REG_NA); + + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + var_types elemType = arrOffset->gtArrElemType; + + // First, consume the operands in the correct order. + regNumber offsetReg = REG_NA; + regNumber tmpReg = REG_NA; + if (!offsetNode->IsIntegralConst(0)) + { + offsetReg = genConsumeReg(offsetNode); + + // We will use a temp register for the offset*scale+effectiveIndex computation. + tmpReg = arrOffset->GetSingleTempReg(); + } + else + { + assert(offsetNode->isContained()); + } + regNumber indexReg = genConsumeReg(indexNode); + // Although arrReg may not be used in the constant-index case, if we have generated + // the value into a register, we must consume it, otherwise we will fail to end the + // live range of the gc ptr. + // TODO-CQ: Currently arrObj will always have a register allocated to it. + // We could avoid allocating a register for it, which would be of value if the arrObj + // is an on-stack lclVar. + regNumber arrReg = REG_NA; + if (arrObj->gtHasReg()) + { + arrReg = genConsumeReg(arrObj); + } + + if (!offsetNode->IsIntegralConst(0)) + { + assert(tmpReg != REG_NA); + assert(arrReg != REG_NA); + + // Evaluate tgtReg = offsetReg*dim_size + indexReg. + // tmpReg is used to load dim_size and the result of the multiplication. + // Note that dim_size will never be negative. + + GetEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg, + genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); + inst_RV_RV(INS_imul, tmpReg, offsetReg); + + if (tmpReg == tgtReg) + { + inst_RV_RV(INS_add, tmpReg, indexReg); + } + else + { + if (indexReg != tgtReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL); + } + inst_RV_RV(INS_add, tgtReg, tmpReg); + } + } + else + { + if (indexReg != tgtReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); + } + } + genProduceReg(arrOffset); +} + +instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) +{ + instruction ins; + + // Operations on SIMD vectors shouldn't come this path + assert(!varTypeIsSIMD(type)); + if (varTypeIsFloating(type)) + { + return ins_MathOp(oper, type); + } + + switch (oper) + { + case GT_ADD: + ins = INS_add; + break; + case GT_AND: + ins = INS_and; + break; + case GT_LSH: + ins = INS_shl; + break; + case GT_MUL: + ins = INS_imul; + break; + case GT_NEG: + ins = INS_neg; + break; + case GT_NOT: + ins = INS_not; + break; + case GT_OR: + ins = INS_or; + break; + case GT_ROL: + ins = INS_rol; + break; + case GT_ROR: + ins = INS_ror; + break; + case GT_RSH: + ins = INS_sar; + break; + case GT_RSZ: + ins = INS_shr; + break; + case GT_SUB: + ins = INS_sub; + break; + case GT_XOR: + ins = INS_xor; + break; +#if !defined(TARGET_64BIT) + case GT_ADD_LO: + ins = INS_add; + break; + case GT_ADD_HI: + ins = INS_adc; + break; + case GT_SUB_LO: + ins = INS_sub; + break; + case GT_SUB_HI: + ins = INS_sbb; + break; + case GT_LSH_HI: + ins = INS_shld; + break; + case GT_RSH_LO: + ins = INS_shrd; + break; +#endif // !defined(TARGET_64BIT) + default: + unreached(); + break; + } + return ins; +} + +//------------------------------------------------------------------------ +// genCodeForShift: Generates the code sequence for a GenTree node that +// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// b) The shift-by-amount in tree->AsOp()->gtOp2 is either a contained constant or +// it's a register-allocated expression. If it is in a register that is +// not RCX, it will be moved to RCX (so RCX better not be in use!). +// +void CodeGen::genCodeForShift(GenTree* tree) +{ + // Only the non-RMW case here. + assert(tree->OperIsShiftOrRotate()); + assert(tree->AsOp()->gtOp1->isUsedFromReg()); + assert(tree->GetRegNum() != REG_NA); + + genConsumeOperands(tree->AsOp()); + + var_types targetType = tree->TypeGet(); + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + + GenTree* operand = tree->gtGetOp1(); + regNumber operandReg = operand->GetRegNum(); + + GenTree* shiftBy = tree->gtGetOp2(); + + if (shiftBy->isContainedIntOrIImmed()) + { + emitAttr size = emitTypeSize(tree); + + // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" + ssize_t intCon = shiftBy->AsIntConCommon()->IconValue(); + if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && (intCon == 1 || intCon == 2 || intCon == 3)) + { + switch (intCon) + { + case 1: + if (tree->GetRegNum() == operandReg) + { + GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg); + } + else + { + GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); + } + break; + case 2: + GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 4, 0); + break; + case 3: + GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 8, 0); + break; + } + } + else + { + int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); + +#if defined(TARGET_64BIT) + // Try to emit rorx if BMI2 is available instead of mov+rol + // it makes sense only for 64bit integers + if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) && + compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) && + (shiftByValue > 0) && (shiftByValue < 64)) + { + const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue; + GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value); + genProduceReg(tree); + return; + } +#endif + // First, move the operand to the destination register and + // later on perform the shift in-place. + // (LSRA will try to avoid this situation through preferencing.) + if (tree->GetRegNum() != operandReg) + { + inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); + } + inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); + } + } + else + { + // We must have the number of bits to shift stored in ECX, since we constrained this node to + // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single + // register destination requirement. + genCopyRegIfNeeded(shiftBy, REG_RCX); + + // The operand to be shifted must not be in ECX + noway_assert(operandReg != REG_RCX); + + if (tree->GetRegNum() != operandReg) + { + inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); + } + inst_RV_CL(ins, tree->GetRegNum(), targetType); + } + + genProduceReg(tree); +} + +#ifdef TARGET_X86 +//------------------------------------------------------------------------ +// genCodeForShiftLong: Generates the code sequence for a GenTree node that +// represents a three operand bit shift or rotate operation (<>Lo). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// b) The shift-by-amount in tree->AsOp()->gtOp2 is a contained constant +// +// TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't +// need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to +// targetReg if sourceHi is a memory operand). Similarly for GT_RSH_LO, sourceLo could be marked as +// contained memory-op. Even if not a memory-op, we could mark it as reg-optional. +// +void CodeGen::genCodeForShiftLong(GenTree* tree) +{ + // Only the non-RMW case here. + genTreeOps oper = tree->OperGet(); + assert(oper == GT_LSH_HI || oper == GT_RSH_LO); + + GenTree* operand = tree->AsOp()->gtOp1; + assert(operand->OperGet() == GT_LONG); + assert(operand->AsOp()->gtOp1->isUsedFromReg()); + assert(operand->AsOp()->gtOp2->isUsedFromReg()); + + GenTree* operandLo = operand->gtGetOp1(); + GenTree* operandHi = operand->gtGetOp2(); + + regNumber regLo = operandLo->GetRegNum(); + regNumber regHi = operandHi->GetRegNum(); + + genConsumeOperands(tree->AsOp()); + + var_types targetType = tree->TypeGet(); + instruction ins = genGetInsForOper(oper, targetType); + + GenTree* shiftBy = tree->gtGetOp2(); + + assert(shiftBy->isContainedIntOrIImmed()); + + unsigned int count = (unsigned int)shiftBy->AsIntConCommon()->IconValue(); + + regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo; + + if (regResult != tree->GetRegNum()) + { + inst_RV_RV(INS_mov, tree->GetRegNum(), regResult, targetType); + } + + if (oper == GT_LSH_HI) + { + inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->GetRegNum(), regLo, count); + } + else + { + assert(oper == GT_RSH_LO); + inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->GetRegNum(), regHi, count); + } + + genProduceReg(tree); +} +#endif + +//------------------------------------------------------------------------ +// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that +// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example: +// GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) ) +// +// Arguments: +// storeIndNode: the GT_STOREIND node. +// +void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd) +{ + GenTree* data = storeInd->Data(); + + assert(data->OperIsShift() || data->OperIsRotate()); + + // This function only handles the RMW case. + assert(data->AsOp()->gtOp1->isUsedFromMemory()); + assert(data->AsOp()->gtOp1->isIndir()); + assert(Lowering::IndirsAreEquivalent(data->AsOp()->gtOp1, storeInd)); + assert(data->GetRegNum() == REG_NA); + + var_types targetType = data->TypeGet(); + genTreeOps oper = data->OperGet(); + instruction ins = genGetInsForOper(oper, targetType); + emitAttr attr = EA_ATTR(genTypeSize(targetType)); + + GenTree* shiftBy = data->AsOp()->gtOp2; + if (shiftBy->isContainedIntOrIImmed()) + { + int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); + ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue); + if (shiftByValue == 1) + { + // There is no source in this case, as the shift by count is embedded in the instruction opcode itself. + GetEmitter()->emitInsRMW(ins, attr, storeInd); + } + else + { + GetEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy); + } + } + else + { + // We must have the number of bits to shift stored in ECX, since we constrained this node to + // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single + // register destination requirement. + genCopyRegIfNeeded(shiftBy, REG_RCX); + + // The shiftBy operand is implicit, so call the unary version of emitInsRMW. + GetEmitter()->emitInsRMW(ins, attr, storeInd); + } +} + +//------------------------------------------------------------------------ +// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR. +// +// Arguments: +// tree - the node. +// +void CodeGen::genCodeForLclAddr(GenTree* tree) +{ + assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + // Address of a local var. + noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL)); + + emitAttr size = emitTypeSize(targetType); + + inst_RV_TT(INS_lea, targetReg, tree, 0, size); + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForLclFld: Produce code for a GT_LCL_FLD node. +// +// Arguments: +// tree - the GT_LCL_FLD node +// +void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) +{ + assert(tree->OperIs(GT_LCL_FLD)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + + noway_assert(targetReg != REG_NA); + +#ifdef FEATURE_SIMD + // Loading of TYP_SIMD12 (i.e. Vector3) field + if (targetType == TYP_SIMD12) + { + genLoadLclTypeSIMD12(tree); + return; + } +#endif + + noway_assert(targetType != TYP_STRUCT); + + emitAttr size = emitTypeSize(targetType); + unsigned offs = tree->GetLclOffs(); + unsigned varNum = tree->GetLclNum(); + assert(varNum < compiler->lvaCount); + + GetEmitter()->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForLclVar: Produce code for a GT_LCL_VAR node. +// +// Arguments: +// tree - the GT_LCL_VAR node +// +void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) +{ + assert(tree->OperIs(GT_LCL_VAR)); + + // lcl_vars are not defs + assert((tree->gtFlags & GTF_VAR_DEF) == 0); + + LclVarDsc* varDsc = compiler->lvaGetDesc(tree); + bool isRegCandidate = varDsc->lvIsRegCandidate(); + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!isRegCandidate && !tree->IsMultiReg() && !(tree->gtFlags & GTF_SPILLED)) + { +#if defined(FEATURE_SIMD) && defined(TARGET_X86) + // Loading of TYP_SIMD12 (i.e. Vector3) variable + if (tree->TypeGet() == TYP_SIMD12) + { + genLoadLclTypeSIMD12(tree); + return; + } +#endif // defined(FEATURE_SIMD) && defined(TARGET_X86) + + var_types type = varDsc->GetRegisterType(tree); + GetEmitter()->emitIns_R_S(ins_Load(type, compiler->isSIMDTypeLocalAligned(tree->GetLclNum())), + emitTypeSize(type), tree->GetRegNum(), tree->GetLclNum(), 0); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. +// +// Arguments: +// tree - the GT_STORE_LCL_FLD node +// +void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) +{ + assert(tree->OperIs(GT_STORE_LCL_FLD)); + + var_types targetType = tree->TypeGet(); + GenTree* op1 = tree->gtGetOp1(); + + noway_assert(targetType != TYP_STRUCT); + +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreLclTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + assert(varTypeUsesFloatReg(targetType) == varTypeUsesFloatReg(op1)); + assert(genTypeSize(genActualType(targetType)) == genTypeSize(genActualType(op1->TypeGet()))); + + genConsumeRegs(op1); + GetEmitter()->emitInsBinary(ins_Store(targetType), emitTypeSize(tree), tree, op1); + + // Updating variable liveness after instruction was emitted + genUpdateLife(tree); +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. +// +// Arguments: +// lclNode - the GT_STORE_LCL_VAR node +// +void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) +{ + assert(lclNode->OperIs(GT_STORE_LCL_VAR)); + + regNumber targetReg = lclNode->GetRegNum(); + emitter* emit = GetEmitter(); + + GenTree* op1 = lclNode->gtGetOp1(); + + // Stores from a multi-reg source are handled separately. + if (op1->gtSkipReloadOrCopy()->IsMultiRegNode()) + { + genMultiRegStoreToLocal(lclNode); + } + else + { + unsigned lclNum = lclNode->GetLclNum(); + LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); + + var_types targetType = varDsc->GetRegisterType(lclNode); + +#ifdef DEBUG + var_types op1Type = op1->TypeGet(); + if (op1Type == TYP_STRUCT) + { + assert(op1->IsLocal()); + GenTreeLclVar* op1LclVar = op1->AsLclVar(); + unsigned op1lclNum = op1LclVar->GetLclNum(); + LclVarDsc* op1VarDsc = compiler->lvaGetDesc(op1lclNum); + op1Type = op1VarDsc->GetRegisterType(op1LclVar); + } + assert(varTypeUsesFloatReg(targetType) == varTypeUsesFloatReg(op1Type)); + assert(!varTypeUsesFloatReg(targetType) || (emitTypeSize(targetType) == emitTypeSize(op1Type))); +#endif + +#if !defined(TARGET_64BIT) + if (targetType == TYP_LONG) + { + genStoreLongLclVar(lclNode); + return; + } +#endif // !defined(TARGET_64BIT) + +#ifdef FEATURE_SIMD + // storing of TYP_SIMD12 (i.e. Vector3) field + if (targetType == TYP_SIMD12) + { + genStoreLclTypeSIMD12(lclNode); + return; + } +#endif // FEATURE_SIMD + + genConsumeRegs(op1); + + if (op1->OperIs(GT_BITCAST) && op1->isContained()) + { + GenTree* bitCastSrc = op1->gtGetOp1(); + var_types srcType = bitCastSrc->TypeGet(); + noway_assert(!bitCastSrc->isContained()); + if (targetReg == REG_NA) + { + emit->emitIns_S_R(ins_Store(srcType, compiler->isSIMDTypeLocalAligned(lclNum)), + emitTypeSize(targetType), bitCastSrc->GetRegNum(), lclNum, 0); + genUpdateLife(lclNode); + varDsc->SetRegNum(REG_STK); + } + else + { + genBitCast(targetType, targetReg, srcType, bitCastSrc->GetRegNum()); + } + } + else if (targetReg == REG_NA) + { + // stack store + emit->emitInsStoreLcl(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), + emitTypeSize(targetType), lclNode); + varDsc->SetRegNum(REG_STK); + } + else + { + // Look for the case where we have a constant zero which we've marked for reuse, + // but which isn't actually in the register we want. In that case, it's better to create + // zero in the target register, because an xor is smaller than a copy. Note that we could + // potentially handle this in the register allocator, but we can't always catch it there + // because the target may not have a register allocated for it yet. + if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) && (op1->IsIntegralConst(0) || op1->IsFPZero())) + { + op1->SetRegNum(REG_NA); + op1->ResetReuseRegVal(); + op1->SetContained(); + } + + if (!op1->isUsedFromReg()) + { + // Currently, we assume that the non-reg source of a GT_STORE_LCL_VAR writing to a register + // must be a constant. However, in the future we might want to support an operand used from + // memory. This is a bit tricky because we have to decide it can be used from memory before + // register allocation, + // and this would be a case where, once that's done, we need to mark that node as always + // requiring a register - which we always assume now anyway, but once we "optimize" that + // we'll have to take cases like this into account. + assert((op1->GetRegNum() == REG_NA) && op1->OperIsConst()); + genSetRegToConst(targetReg, targetType, op1); + } + else if (op1->GetRegNum() != targetReg) + { + assert(op1->GetRegNum() != REG_NA); + emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(lclNode), lclNode, op1); + } + } + if (targetReg != REG_NA) + { + genProduceReg(lclNode); + } + } +} + +//------------------------------------------------------------------------ +// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node. +// +// Arguments: +// tree - the GT_INDEX_ADDR node +// +void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) +{ + GenTree* const base = node->Arr(); + GenTree* const index = node->Index(); + + const regNumber baseReg = genConsumeReg(base); + regNumber indexReg = genConsumeReg(index); + const regNumber dstReg = node->GetRegNum(); + + // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers + // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the + // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(baseReg, base->TypeGet()); + assert(varTypeIsIntegral(index->TypeGet())); + + regNumber tmpReg = REG_NA; +#ifdef TARGET_64BIT + tmpReg = node->GetSingleTempReg(); +#endif + + // Generate the bounds check if necessary. + if ((node->gtFlags & GTF_INX_RNGCHK) != 0) + { +#ifdef TARGET_64BIT + // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index + // is a native int on a 64-bit platform, we will need to widen the array length and then compare. + if (index->TypeGet() == TYP_I_IMPL) + { + GetEmitter()->emitIns_R_AR(INS_mov, EA_4BYTE, tmpReg, baseReg, static_cast(node->gtLenOffset)); + GetEmitter()->emitIns_R_R(INS_cmp, EA_8BYTE, indexReg, tmpReg); + } + else +#endif // TARGET_64BIT + { + GetEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, indexReg, baseReg, static_cast(node->gtLenOffset)); + } + + genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL, node->gtIndRngFailBB); + } + +#ifdef TARGET_64BIT + if (index->TypeGet() != TYP_I_IMPL) + { + // LEA needs 64-bit operands so we need to widen the index if it's TYP_INT. + GetEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, tmpReg, indexReg); + indexReg = tmpReg; + } +#endif // TARGET_64BIT + + // Compute the address of the array element. + unsigned scale = node->gtElemSize; + + switch (scale) + { + case 1: + case 2: + case 4: + case 8: + tmpReg = indexReg; + break; + + default: +#ifdef TARGET_64BIT + // IMUL treats its immediate operand as signed so scale can't be larger than INT32_MAX. + // The VM doesn't allow such large array elements but let's be sure. + noway_assert(scale <= INT32_MAX); +#else // !TARGET_64BIT + tmpReg = node->GetSingleTempReg(); +#endif // !TARGET_64BIT + + GetEmitter()->emitIns_R_I(emitter::inst3opImulForReg(tmpReg), EA_PTRSIZE, indexReg, + static_cast(scale)); + scale = 1; + break; + } + + GetEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node->TypeGet()), dstReg, baseReg, tmpReg, scale, + static_cast(node->gtElemOffset)); + + gcInfo.gcMarkRegSetNpt(base->gtGetRegMask()); + + genProduceReg(node); +} + +//------------------------------------------------------------------------ +// genCodeForIndir: Produce code for a GT_IND node. +// +// Arguments: +// tree - the GT_IND node +// +void CodeGen::genCodeForIndir(GenTreeIndir* tree) +{ + assert(tree->OperIs(GT_IND)); + +#ifdef FEATURE_SIMD + // Handling of Vector3 type values loaded through indirection. + if (tree->TypeGet() == TYP_SIMD12) + { + genLoadIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + var_types targetType = tree->TypeGet(); + emitter* emit = GetEmitter(); + + GenTree* addr = tree->Addr(); + if (addr->IsCnsIntOrI() && addr->IsIconHandle(GTF_ICON_TLS_HDL)) + { + noway_assert(EA_ATTR(genTypeSize(targetType)) == EA_PTRSIZE); + emit->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tree->GetRegNum(), FLD_GLOBAL_FS, + (int)addr->AsIntCon()->gtIconVal); + } + else + { + genConsumeAddress(addr); + emit->emitInsLoadInd(ins_Load(targetType), emitTypeSize(tree), tree->GetRegNum(), tree); + } + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForStoreInd: Produce code for a GT_STOREIND node. +// +// Arguments: +// tree - the GT_STOREIND node +// +void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +{ + assert(tree->OperIs(GT_STOREIND)); + +#ifdef FEATURE_SIMD + // Storing Vector3 of size 12 bytes through indirection + if (tree->TypeGet() == TYP_SIMD12) + { + genStoreIndTypeSIMD12(tree); + return; + } +#endif // FEATURE_SIMD + + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + var_types targetType = tree->TypeGet(); + + assert(!varTypeIsFloating(targetType) || (genTypeSize(targetType) == genTypeSize(data->TypeGet()))); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering registers are taken care of. + genConsumeOperands(tree); + + if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data)) + { + return; + } + + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go. + noway_assert(data->GetRegNum() != REG_ARG_0); + + // addr goes in REG_ARG_0 + genCopyRegIfNeeded(addr, REG_ARG_0); + + // data goes in REG_ARG_1 + genCopyRegIfNeeded(data, REG_ARG_1); + + genGCWriteBarrier(tree, writeBarrierForm); + } + else + { + bool dataIsUnary = false; + bool isRMWMemoryOp = tree->IsRMWMemoryOp(); + GenTree* rmwSrc = nullptr; + + // We must consume the operands in the proper execution order, so that liveness is + // updated appropriately. + genConsumeAddress(addr); + + // If tree represents a RMW memory op then its data is a non-leaf node marked as contained + // and non-indir operand of data is the source of RMW memory op. + if (isRMWMemoryOp) + { + assert(data->isContained() && !data->OperIsLeaf()); + + GenTree* rmwDst = nullptr; + + dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); + if (!dataIsUnary) + { + if (tree->IsRMWDstOp1()) + { + rmwDst = data->gtGetOp1(); + rmwSrc = data->gtGetOp2(); + } + else + { + assert(tree->IsRMWDstOp2()); + rmwDst = data->gtGetOp2(); + rmwSrc = data->gtGetOp1(); + } + + genConsumeRegs(rmwSrc); + } + else + { + // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p) + // For unary RMW ops, src and dst of RMW memory op is the same. Lower + // clears operand counts on rmwSrc and we don't need to perform a + // genConsumeReg() on it. + assert(tree->IsRMWDstOp1()); + rmwSrc = data->gtGetOp1(); + rmwDst = data->gtGetOp1(); + assert(rmwSrc->isUsedFromMemory()); + } + + assert(rmwSrc != nullptr); + assert(rmwDst != nullptr); + assert(Lowering::IndirsAreEquivalent(rmwDst, tree)); + } + else + { + genConsumeRegs(data); + } + + if (isRMWMemoryOp) + { + if (dataIsUnary) + { + // generate code for unary RMW memory ops like neg/not + GetEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(tree), tree); + } + else + { + if (data->OperIsShiftOrRotate()) + { + // Generate code for shift RMW memory ops. + // The data address needs to be op1 (it must be [addr] = [addr] , not [addr] = + // [addr]). + assert(tree->IsRMWDstOp1()); + assert(rmwSrc == data->gtGetOp2()); + genCodeForShiftRMW(tree); + } + else if (data->OperGet() == GT_ADD && (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1))) + { + // Generate "inc/dec [mem]" instead of "add/sub [mem], 1". + // + // Notes: + // 1) Global morph transforms GT_SUB(x, +/-1) into GT_ADD(x, -/+1). + // 2) TODO-AMD64: Debugger routine NativeWalker::Decode() runs into + // an assert while decoding ModR/M byte of "inc dword ptr [rax]". + // It is not clear whether Decode() can handle all possible + // addr modes with inc/dec. For this reason, inc/dec [mem] + // is not generated while generating debuggable code. Update + // the above if condition once Decode() routine is fixed. + assert(rmwSrc->isContainedIntOrIImmed()); + instruction ins = rmwSrc->IsIntegralConst(1) ? INS_inc : INS_dec; + GetEmitter()->emitInsRMW(ins, emitTypeSize(tree), tree); + } + else + { + // generate code for remaining binary RMW memory ops like add/sub/and/or/xor + GetEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(tree), + tree, rmwSrc); + } + } + } + else + { + GetEmitter()->emitInsStoreInd(ins_Store(data->TypeGet()), emitTypeSize(tree), tree); + } + } +} + +//------------------------------------------------------------------------ +// genCodeForSwap: Produce code for a GT_SWAP node. +// +// Arguments: +// tree - the GT_SWAP node +// +void CodeGen::genCodeForSwap(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_SWAP)); + + // Swap is only supported for lclVar operands that are enregistered + // We do not consume or produce any registers. Both operands remain enregistered. + // However, the gc-ness may change. + assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2)); + + GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->GetLclNum()]); + var_types type1 = varDsc1->TypeGet(); + GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon(); + LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->GetLclNum()]); + var_types type2 = varDsc2->TypeGet(); + + // We must have both int or both fp regs + assert(!varTypeUsesFloatReg(type1) || varTypeUsesFloatReg(type2)); + + // FP swap is not yet implemented (and should have NYI'd in LSRA) + assert(!varTypeUsesFloatReg(type1)); + + regNumber oldOp1Reg = lcl1->GetRegNum(); + regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); + regNumber oldOp2Reg = lcl2->GetRegNum(); + regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); + + // We don't call genUpdateVarReg because we don't have a tree node with the new register. + varDsc1->SetRegNum(oldOp2Reg); + varDsc2->SetRegNum(oldOp1Reg); + + // Do the xchg + emitAttr size = EA_PTRSIZE; + if (varTypeGCtype(type1) != varTypeGCtype(type2)) + { + // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. + // Otherwise it will leave them alone, which is correct if they have the same GC-ness. + size = EA_GCREF; + } + inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); + + // Update the gcInfo. + // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) + gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + + // gcMarkRegPtrVal will do the appropriate thing for non-gc types. + // It will also dump the updates. + gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); + gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); +} + +//------------------------------------------------------------------------ +// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized +// helper functions. +// +// Arguments: +// writeBarrierForm - the write barrier form to use +// addr - the address at which to do the store +// data - the data to store +// +// Return Value: +// true if an optimized write barrier form was used, false if not. If this +// function returns false, the caller must emit a "standard" write barrier. + +bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data) +{ + assert(writeBarrierForm != GCInfo::WBF_NoBarrier); + +#if defined(TARGET_X86) && NOGC_WRITE_BARRIERS + if (!genUseOptimizedWriteBarriers(writeBarrierForm)) + { + return false; + } + + const static int regToHelper[2][8] = { + // If the target is known to be in managed memory + { + CORINFO_HELP_ASSIGN_REF_EAX, // EAX + CORINFO_HELP_ASSIGN_REF_ECX, // ECX + -1, // EDX (always the target address) + CORINFO_HELP_ASSIGN_REF_EBX, // EBX + -1, // ESP + CORINFO_HELP_ASSIGN_REF_EBP, // EBP + CORINFO_HELP_ASSIGN_REF_ESI, // ESI + CORINFO_HELP_ASSIGN_REF_EDI, // EDI + }, + + // Don't know if the target is in managed memory + { + CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, // EAX + CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, // ECX + -1, // EDX (always the target address) + CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, // EBX + -1, // ESP + CORINFO_HELP_CHECKED_ASSIGN_REF_EBP, // EBP + CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, // ESI + CORINFO_HELP_CHECKED_ASSIGN_REF_EDI, // EDI + }, + }; + + noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX); + noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX); + noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX); + noway_assert(regToHelper[0][REG_ESP] == -1); + noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP); + noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI); + noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI); + + noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX); + noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX); + noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX); + noway_assert(regToHelper[1][REG_ESP] == -1); + noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP); + noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI); + noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI); + + regNumber reg = data->GetRegNum(); + noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER)); + + // Generate the following code: + // lea edx, addr + // call write_barrier_helper_reg + + // addr goes in REG_ARG_0 + genCopyRegIfNeeded(addr, REG_WRITE_BARRIER); + + unsigned tgtAnywhere = 0; + if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked) + { + tgtAnywhere = 1; + } + + // We might want to call a modified version of genGCWriteBarrier() to get the benefit of + // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works + // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here. + + genEmitHelperCall(regToHelper[tgtAnywhere][reg], + 0, // argSize + EA_PTRSIZE); // retSize + + return true; +#else // !defined(TARGET_X86) || !NOGC_WRITE_BARRIERS + return false; +#endif // !defined(TARGET_X86) || !NOGC_WRITE_BARRIERS +} + +// Produce code for a GT_CALL node +void CodeGen::genCallInstruction(GenTreeCall* call) +{ + genAlignStackBeforeCall(call); + + gtCallTypes callType = (gtCallTypes)call->gtCallType; + + IL_OFFSETX ilOffset = BAD_IL_OFFSET; + + // all virtuals should have been expanded into a control expression + assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); + + // Insert a GS check if necessary + if (call->IsTailCallViaJitHelper()) + { + if (compiler->getNeedsGSSecurityCookie()) + { +#if FEATURE_FIXED_OUT_ARGS + // If either of the conditions below is true, we will need a temporary register in order to perform the GS + // cookie check. When FEATURE_FIXED_OUT_ARGS is disabled, we save and restore the temporary register using + // push/pop. When FEATURE_FIXED_OUT_ARGS is enabled, however, we need an alternative solution. For now, + // though, the tail prefix is ignored on all platforms that use fixed out args, so we should never hit this + // case. + assert(compiler->gsGlobalSecurityCookieAddr == nullptr); + assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal); +#endif + genEmitGSCookieCheck(true); + } + } + + // Consume all the arg regs + for (GenTreeCall::Use& use : call->LateArgs()) + { + GenTree* argNode = use.GetNode(); + + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); + assert(curArgTabEntry); + + if (curArgTabEntry->GetRegNum() == REG_STK) + { + continue; + } + +#ifdef UNIX_AMD64_ABI + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_FIELD_LIST) + { + unsigned regIndex = 0; + for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses()) + { + GenTree* putArgRegNode = use.GetNode(); + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + regNumber argReg = curArgTabEntry->GetRegNum(regIndex++); + + genConsumeReg(putArgRegNode); + + // Validate the putArgRegNode has the right type. + assert(varTypeUsesFloatReg(putArgRegNode->TypeGet()) == genIsValidFloatReg(argReg)); + if (putArgRegNode->GetRegNum() != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), false), argReg, putArgRegNode->GetRegNum()); + } + } + } + else +#endif // UNIX_AMD64_ABI + { + regNumber argReg = curArgTabEntry->GetRegNum(); + genConsumeReg(argNode); + if (argNode->GetRegNum() != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), false), argReg, argNode->GetRegNum()); + } + } + +#if FEATURE_VARARG + // In the case of a varargs call, + // the ABI dictates that if we have floating point args, + // we must pass the enregistered arguments in both the + // integer and floating point registers so, let's do that. + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + regNumber srcReg = argNode->GetRegNum(); + regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum()); + inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg); + } +#endif // FEATURE_VARARG + } + +#if defined(TARGET_X86) || defined(UNIX_AMD64_ABI) + // The call will pop its arguments. + // for each putarg_stk: + target_ssize_t stackArgBytes = 0; + for (GenTreeCall::Use& use : call->Args()) + { + GenTree* arg = use.GetNode(); + if (arg->OperIs(GT_PUTARG_STK) && ((arg->gtFlags & GTF_LATE_ARG) == 0)) + { + GenTree* source = arg->AsPutArgStk()->gtGetOp1(); + unsigned size = arg->AsPutArgStk()->GetStackByteSize(); + stackArgBytes += size; +#ifdef DEBUG + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert(curArgTabEntry != nullptr); + assert(size == (curArgTabEntry->numSlots * TARGET_POINTER_SIZE)); +#ifdef FEATURE_PUT_STRUCT_ARG_STK + if (!source->OperIs(GT_FIELD_LIST) && (source->TypeGet() == TYP_STRUCT)) + { + GenTreeObj* obj = source->AsObj(); + unsigned argBytes = roundUp(obj->GetLayout()->GetSize(), TARGET_POINTER_SIZE); +#ifdef TARGET_X86 + // If we have an OBJ, we must have created a copy if the original arg was not a + // local and was not a multiple of TARGET_POINTER_SIZE. + // Note that on x64/ux this will be handled by unrolling in genStructPutArgUnroll. + assert((argBytes == obj->GetLayout()->GetSize()) || obj->Addr()->IsLocalAddrExpr()); +#endif // TARGET_X86 + assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes); + } +#endif // FEATURE_PUT_STRUCT_ARG_STK +#endif // DEBUG + } + } +#endif // defined(TARGET_X86) || defined(UNIX_AMD64_ABI) + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + GetEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); + } + + // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = call->gtControlExpr; + if (callType == CT_INDIRECT) + { + assert(target == nullptr); + target = call->gtCallAddr; + methHnd = nullptr; + } + else + { + methHnd = call->gtCallMethHnd; + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->callSig; + } +#endif // DEBUG + + // If fast tail call, then we are done. In this case we setup the args (both reg args + // and stack args in incoming arg area) and call target in rax. Epilog sequence would + // generate "jmp rax". + if (call->IsFastTailCall()) + { + // Don't support fast tail calling JIT helpers + assert(callType != CT_HELPER); + + // If this is indirect then we go through RAX with epilog sequence + // generating "jmp rax". Otherwise epilog will try to generate a + // rip-relative jump. + if (target != nullptr) + { + genConsumeReg(target); + genCopyRegIfNeeded(target, REG_RAX); + } + + return; + } + + // For a pinvoke to unmanged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (compiler->killGCRefs(call)) + { + genDefineTempLabel(genCreateTempLabel()); + } + + // Determine return value size(s). + const ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + emitAttr secondRetSize = EA_UNKNOWN; + + if (call->HasMultiRegRetVal()) + { + retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0)); + secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1)); + } + else + { + assert(!varTypeIsStruct(call)); + + if (call->gtType == TYP_REF) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + +#if defined(DEBUG) && defined(TARGET_X86) + // Store the stack pointer so we can check it after the call. + if (compiler->opts.compStackCheckOnCall && call->gtCallType == CT_USER_FUNC) + { + noway_assert(compiler->lvaCallSpCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaCallSpCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaCallSpCheck].lvOnFrame); + GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallSpCheck, 0); + } +#endif // defined(DEBUG) && defined(TARGET_X86) + + bool fPossibleSyncHelperCall = false; + CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; + + // We need to propagate the IL offset information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); + } + +#if defined(TARGET_X86) + bool fCallerPop = call->CallerPop(); + + // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will + // adjust its stack level accordingly. + // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the + // pop when we're done. + target_ssize_t argSizeForEmitter = stackArgBytes; + if (fCallerPop) + { + argSizeForEmitter = -stackArgBytes; + } +#endif // defined(TARGET_X86) + + // When it's a PInvoke call and the call type is USER function, we issue VZEROUPPER here + // if the function contains 256bit AVX instructions, this is to avoid AVX-256 to Legacy SSE + // transition penalty, assuming the user function contains legacy SSE instruction. + // To limit code size increase impact: we only issue VZEROUPPER before PInvoke call, not issue + // VZEROUPPER after PInvoke call because transition penalty from legacy SSE to AVX only happens + // when there's preceding 256-bit AVX to legacy SSE transition penalty. + if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && GetEmitter()->Contains256bitAVX()) + { + assert(compiler->canUseVexEncoding()); + instGen(INS_vzeroupper); + } + + if (callType == CT_HELPER && compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + fPossibleSyncHelperCall = true; + helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + } + + if (target != nullptr) + { +#ifdef TARGET_X86 + if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) + { + // On x86, we need to generate a very specific pattern for indirect VSD calls: + // + // 3-byte nop + // call dword ptr [eax] + // + // Where EAX is also used as an argument to the stub dispatch helper. Make + // sure that the call target address is computed into EAX in this case. + + assert(compiler->virtualStubParamInfo->GetReg() == REG_VIRTUAL_STUB_TARGET); + + assert(target->isContainedIndir()); + assert(target->OperGet() == GT_IND); + + GenTree* addr = target->AsIndir()->Addr(); + assert(addr->isUsedFromReg()); + + genConsumeReg(addr); + genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET); + + GetEmitter()->emitIns_Nop(3); + + // clang-format off + GetEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD), + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, + argSizeForEmitter, + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0); + // clang-format on + } + else +#endif + if (target->isContainedIndir()) + { + if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()) + { + // Note that if gtControlExpr is an indir of an absolute address, we mark it as + // contained only if it can be encoded as PC-relative offset. + assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler)); + + // clang-format off + genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue() + X86_ARG(argSizeForEmitter), + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + ilOffset); + // clang-format on + } + else + { + // clang-format off + genEmitCall(emitter::EC_INDIR_ARD, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + target->AsIndir() + X86_ARG(argSizeForEmitter), + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + ilOffset); + // clang-format on + } + } + else + { + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + assert(genIsValidIntReg(target->GetRegNum())); + + // clang-format off + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr // addr + X86_ARG(argSizeForEmitter), + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + ilOffset, + genConsumeReg(target)); + // clang-format on + } + } +#ifdef FEATURE_READYTORUN_COMPILER + else if (call->gtEntryPoint.addr != nullptr) + { + // clang-format off + genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN + : emitter::EC_FUNC_TOKEN_INDIR, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + (void*) call->gtEntryPoint.addr + X86_ARG(argSizeForEmitter), + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + ilOffset); + // clang-format on + } +#endif + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(callType == CT_HELPER || callType == CT_USER_FUNC); + + void* addr = nullptr; + if (callType == CT_HELPER) + { + // Direct call to a helper method. + helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + assert(pAddr == nullptr); + } + else + { + // Direct call to a non-virtual user function. + addr = call->gtDirectCallAddress; + } + + assert(addr != nullptr); + + // Non-virtual direct calls to known addresses + + // clang-format off + genEmitCall(emitter::EC_FUNC_TOKEN, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + addr + X86_ARG(argSizeForEmitter), + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + ilOffset); + // clang-format on + } + + // if it was a pinvoke or intrinsic we may have needed to get the address of a label + if (genPendingCallLabel) + { + genDefineInlineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + + // Update GC info: + // All Callee arg registers are trashed and no longer contain any GC pointers. + // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? + // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other + // registers from RBM_CALLEE_TRASH. + assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; + gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { +#ifdef TARGET_X86 + if (varTypeIsFloating(returnType)) + { + // Spill the value from the fp stack. + // Then, load it into the target register. + call->gtFlags |= GTF_SPILL; + regSet.rsSpillFPStack(call); + call->gtFlags |= GTF_SPILLED; + call->gtFlags &= ~GTF_SPILL; + } + else +#endif // TARGET_X86 + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + assert(retTypeDesc != nullptr); + const unsigned regCount = retTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = retTypeDesc->GetReturnRegType(i); + returnReg = retTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + if (returnReg != allocatedReg) + { + inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); + } + } + +#ifdef FEATURE_SIMD + // A Vector3 return value is stored in xmm0 and xmm1. + // RyuJIT assumes that the upper unused bits of xmm1 are cleared but + // the native compiler doesn't guarantee it. + if (returnType == TYP_SIMD12) + { + returnReg = retTypeDesc->GetABIReturnReg(1); + // Clear the upper 32 bits by two shift instructions. + // retReg = retReg << 96 + // retReg = retReg >> 96 + GetEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12); + GetEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12); + } +#endif // FEATURE_SIMD + } + else + { +#ifdef TARGET_X86 + if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) + { + // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with + // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the + // correct argument registers. + returnReg = REG_PINVOKE_TCB; + } + else +#endif // TARGET_X86 + if (varTypeIsFloating(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->GetRegNum() != returnReg) + { + inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType); + } + } + + genProduceReg(call); + } + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && compiler->opts.OptimizationEnabled()) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } + +#if defined(DEBUG) && defined(TARGET_X86) + if (compiler->opts.compStackCheckOnCall && call->gtCallType == CT_USER_FUNC) + { + noway_assert(compiler->lvaCallSpCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaCallSpCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaCallSpCheck].lvOnFrame); + if (!fCallerPop && (stackArgBytes != 0)) + { + // ECX is trashed, so can be used to compute the expected SP. We saved the value of SP + // after pushing all the stack arguments, but the caller popped the arguments, so we need + // to do some math to figure a good comparison. + GetEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE); + GetEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, stackArgBytes); + GetEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallSpCheck, 0); + } + else + { + GetEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallSpCheck, 0); + } + + BasicBlock* sp_check = genCreateTempLabel(); + GetEmitter()->emitIns_J(INS_je, sp_check); + instGen(INS_BREAKPOINT); + genDefineTempLabel(sp_check); + } +#endif // defined(DEBUG) && defined(TARGET_X86) + +#if !defined(FEATURE_EH_FUNCLETS) + //------------------------------------------------------------------------- + // Create a label for tracking of region protected by the monitor in synchronized methods. + // This needs to be here, rather than above where fPossibleSyncHelperCall is set, + // so the GC state vars have been updated before creating the label. + + if (fPossibleSyncHelperCall) + { + switch (helperNum) + { + case CORINFO_HELP_MON_ENTER: + case CORINFO_HELP_MON_ENTER_STATIC: + noway_assert(compiler->syncStartEmitCookie == NULL); + compiler->syncStartEmitCookie = + GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncStartEmitCookie != NULL); + break; + case CORINFO_HELP_MON_EXIT: + case CORINFO_HELP_MON_EXIT_STATIC: + noway_assert(compiler->syncEndEmitCookie == NULL); + compiler->syncEndEmitCookie = + GetEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); + noway_assert(compiler->syncEndEmitCookie != NULL); + break; + default: + break; + } + } +#endif // !FEATURE_EH_FUNCLETS + + unsigned stackAdjustBias = 0; + +#if defined(TARGET_X86) + // Is the caller supposed to pop the arguments? + if (fCallerPop && (stackArgBytes != 0)) + { + stackAdjustBias = stackArgBytes; + } + + SubtractStackLevel(stackArgBytes); +#endif // TARGET_X86 + + genRemoveAlignmentAfterCall(call, stackAdjustBias); +} + +// Produce code for a GT_JMP node. +// The arguments of the caller needs to be transferred to the callee before exiting caller. +// The actual jump to callee is generated as part of caller epilog sequence. +// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. +void CodeGen::genJmpMethod(GenTree* jmp) +{ + assert(jmp->OperGet() == GT_JMP); + assert(compiler->compJmpOpUsed); + + // If no arguments, nothing to do + if (compiler->info.compArgsCount == 0) + { + return; + } + + // Make sure register arguments are in their initial registers + // and stack arguments are put back as well. + unsigned varNum; + LclVarDsc* varDsc; + + // First move any en-registered stack arguments back to the stack. + // At the same time any reg arg not in correct reg is moved back to its stack location. + // + // We are not strictly required to spill reg args that are not in the desired reg for a jmp call + // But that would require us to deal with circularity while moving values around. Spilling + // to stack makes the implementation simple, which is not a bad trade off given Jmp calls + // are not frequent. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK)) + { + // Skip reg args which are already in its right register for jmp call. + // If not, we will spill such args to their stack locations. + // + // If we need to generate a tail call profiler hook, then spill all + // arg regs to free them up for the callback. + if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg())) + { + continue; + } + } + else if (varDsc->GetRegNum() == REG_STK) + { + // Skip args which are currently living in stack. + continue; + } + + // If we came here it means either a reg argument not in the right register or + // a stack argument currently living in a register. In either case the following + // assert should hold. + assert(varDsc->GetRegNum() != REG_STK); + + assert(!varDsc->lvIsStructField || (compiler->lvaTable[varDsc->lvParentLcl].lvFieldCnt == 1)); + var_types storeType = genActualType(varDsc->lvaArgType()); // We own the memory and can use the full move. + GetEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), varDsc->GetRegNum(), varNum, 0); + + // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. + // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it. + // Therefore manually update life of varDsc->GetRegNum(). + regMaskTP tempMask = varDsc->lvRegMask(); + regSet.RemoveMaskVars(tempMask); + gcInfo.gcMarkRegSetNpt(tempMask); + if (compiler->lvaIsGCTracked(varDsc)) + { +#ifdef DEBUG + if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); + } +#endif // DEBUG + + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + +#ifdef PROFILING_SUPPORTED + // At this point all arg regs are free. + // Emit tail call profiler callback. + genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); +#endif + + // Next move any un-enregistered register arguments back to their register. + regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. + unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) + { + varDsc = compiler->lvaTable + varNum; + if (varDsc->lvPromoted) + { + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here + + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; + } + noway_assert(varDsc->lvIsParam); + + // Skip if arg not passed in a register. + if (!varDsc->lvIsRegArg) + { + continue; + } + +#if defined(UNIX_AMD64_ABI) + if (varTypeIsStruct(varDsc)) + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->GetStructHnd(); + assert(typeHnd != nullptr); + + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + assert(structDesc.passedInRegisters); + + unsigned __int8 offset0 = 0; + unsigned __int8 offset1 = 0; + var_types type0 = TYP_UNKNOWN; + var_types type1 = TYP_UNKNOWN; + + // Get the eightbyte data + compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1); + + // Move the values into the right registers. + // + + // Update varDsc->GetArgReg() and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and + // argReg is going live. Note that we cannot modify varDsc->GetRegNum() and lvOtherArgReg here + // because another basic block may not be expecting it. + // Therefore manually update life of argReg. Note that GT_JMP marks + // the end of the basic block and after which reg life and gc info will be recomputed for the new block in + // genCodeForBBList(). + if (type0 != TYP_UNKNOWN) + { + GetEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->GetArgReg(), varNum, offset0); + regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetArgReg())); + gcInfo.gcMarkRegPtrVal(varDsc->GetArgReg(), type0); + } + + if (type1 != TYP_UNKNOWN) + { + GetEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->GetOtherArgReg(), varNum, + offset1); + regSet.SetMaskVars(regSet.GetMaskVars() | genRegMask(varDsc->GetOtherArgReg())); + gcInfo.gcMarkRegPtrVal(varDsc->GetOtherArgReg(), type1); + } + + if (varDsc->lvTracked) + { + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + else +#endif // !defined(UNIX_AMD64_ABI) + { + // Register argument + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef TARGET_X86 + noway_assert( + isRegParamType(genActualType(varDsc->TypeGet())) || + (varTypeIsStruct(varDsc->TypeGet()) && compiler->isTrivialPointerSizedStruct(varDsc->GetStructHnd()))); +#else + noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); +#endif // TARGET_X86 + + // Is register argument already in the right register? + // If not load it from its stack location. + var_types loadType = varDsc->lvaArgType(); + +#ifdef TARGET_X86 + if (varTypeIsStruct(varDsc->TypeGet())) + { + // Treat trivial pointer-sized structs as a pointer sized primitive + // for the purposes of registers. + loadType = TYP_I_IMPL; + } +#endif + + regNumber argReg = varDsc->GetArgReg(); // incoming arg register + + if (varDsc->GetRegNum() != argReg) + { + assert(genIsValidReg(argReg)); + GetEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); + + // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. + // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be + // expecting it. Therefore manually update life of argReg. Note that GT_JMP marks the end of the + // basic block and after which reg life and gc info will be recomputed for the new block in + // genCodeForBBList(). + regSet.AddMaskVars(genRegMask(argReg)); + gcInfo.gcMarkRegPtrVal(argReg, loadType); + if (compiler->lvaIsGCTracked(varDsc)) + { +#ifdef DEBUG + if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum); + } +#endif // DEBUG + + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + } + +#if FEATURE_VARARG && defined(TARGET_AMD64) + // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg + // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to + // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point + // values on the stack. + if (compiler->info.compIsVarArgs) + { + regNumber intArgReg; + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->GetArgReg(); // incoming arg register + + if (varTypeIsFloating(loadType)) + { + intArgReg = compiler->getCallArgIntRegister(argReg); + inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); + } + else + { + intArgReg = argReg; + } + + fixedIntArgMask |= genRegMask(intArgReg); + + if (intArgReg == REG_ARG_0) + { + assert(firstArgVarNum == BAD_VAR_NUM); + firstArgVarNum = varNum; + } + } +#endif // FEATURE_VARARG + } + +#if FEATURE_VARARG && defined(TARGET_AMD64) + // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments, + // load the remaining arg registers (both int and float) from the corresponding + // shadow stack slots. This is for the reason that we don't know the number and type + // of non-fixed params passed by the caller, therefore we have to assume the worst case + // of caller passing float/double args both in int and float arg regs. + // + // This doesn't apply to x86, which doesn't pass floating point values in floating + // point registers. + // + // The caller could have passed gc-ref/byref type var args. Since these are var args + // the callee no way of knowing their gc-ness. Therefore, mark the region that loads + // remaining arg registers from shadow stack slots as non-gc interruptible. + if (fixedIntArgMask != RBM_NONE) + { + assert(compiler->info.compIsVarArgs); + assert(firstArgVarNum != BAD_VAR_NUM); + + regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; + if (remainingIntArgMask != RBM_NONE) + { + GetEmitter()->emitDisableGC(); + for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) + { + regNumber argReg = intArgRegs[argNum]; + regMaskTP argRegMask = genRegMask(argReg); + + if ((remainingIntArgMask & argRegMask) != 0) + { + remainingIntArgMask &= ~argRegMask; + GetEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset); + + // also load it in corresponding float arg reg + regNumber floatReg = compiler->getCallArgFloatRegister(argReg); + inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg); + } + + argOffset += REGSIZE_BYTES; + } + GetEmitter()->emitEnableGC(); + } + } +#endif // FEATURE_VARARG +} + +// produce code for a GT_LEA subnode +void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) +{ + emitAttr size = emitTypeSize(lea); + genConsumeOperands(lea); + + if (lea->Base() && lea->Index()) + { + regNumber baseReg = lea->Base()->GetRegNum(); + regNumber indexReg = lea->Index()->GetRegNum(); + GetEmitter()->emitIns_R_ARX(INS_lea, size, lea->GetRegNum(), baseReg, indexReg, lea->gtScale, lea->Offset()); + } + else if (lea->Base()) + { + GetEmitter()->emitIns_R_AR(INS_lea, size, lea->GetRegNum(), lea->Base()->GetRegNum(), lea->Offset()); + } + else if (lea->Index()) + { + GetEmitter()->emitIns_R_ARX(INS_lea, size, lea->GetRegNum(), REG_NA, lea->Index()->GetRegNum(), lea->gtScale, + lea->Offset()); + } + + genProduceReg(lea); +} + +//------------------------------------------------------------------------ +// genCompareFloat: Generate code for comparing two floating point values +// +// Arguments: +// treeNode - the compare tree +// +void CodeGen::genCompareFloat(GenTree* treeNode) +{ + assert(treeNode->OperIsCompare()); + + GenTreeOp* tree = treeNode->AsOp(); + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + genConsumeOperands(tree); + + assert(varTypeIsFloating(op1Type)); + assert(op1Type == op2Type); + + regNumber targetReg = treeNode->GetRegNum(); + instruction ins; + emitAttr cmpAttr; + + GenCondition condition = GenCondition::FromFloatRelop(treeNode); + + if (condition.PreferSwap()) + { + condition = GenCondition::Swap(condition); + std::swap(op1, op2); + } + + ins = ins_FloatCompare(op1Type); + cmpAttr = emitTypeSize(op1Type); + + GetEmitter()->emitInsBinary(ins, cmpAttr, op1, op2); + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + if ((condition.GetCode() == GenCondition::FNEU) && (op1->GetRegNum() == op2->GetRegNum())) + { + // For floating point, `x != x` is a common way of + // checking for NaN. So, in the case where both + // operands are the same, we can optimize codegen + // to only do a single check. + + condition = GenCondition(GenCondition::P); + } + + inst_SETCC(condition, treeNode->TypeGet(), targetReg); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCompareInt: Generate code for comparing ints or, on amd64, longs. +// +// Arguments: +// treeNode - the compare tree +// +// Return Value: +// None. +void CodeGen::genCompareInt(GenTree* treeNode) +{ + assert(treeNode->OperIsCompare() || treeNode->OperIs(GT_CMP)); + + GenTreeOp* tree = treeNode->AsOp(); + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + emitter* emit = GetEmitter(); + bool canReuseFlags = false; + + genConsumeOperands(tree); + + assert(!op1->isContainedIntOrIImmed()); + assert(!varTypeIsFloating(op2Type)); + + instruction ins; + var_types type = TYP_UNKNOWN; + + if (tree->OperIs(GT_TEST_EQ, GT_TEST_NE)) + { + ins = INS_test; + + // Unlike many xarch instructions TEST doesn't have a form with a 16/32/64 bit first operand and + // an 8 bit immediate second operand. But if the immediate value fits in 8 bits then we can simply + // emit a 8 bit TEST instruction, unless we're targeting x86 and the first operand is a non-byteable + // register. + // Note that lowering does something similar but its main purpose is to allow memory operands to be + // contained so it doesn't handle other kind of operands. It could do more but on x86 that results + // in additional register constrains and that may be worse than wasting 3 bytes on an immediate. + if ( +#ifdef TARGET_X86 + (!op1->isUsedFromReg() || isByteReg(op1->GetRegNum())) && +#endif + (op2->IsCnsIntOrI() && genSmallTypeCanRepresentValue(TYP_UBYTE, op2->AsIntCon()->IconValue()))) + { + type = TYP_UBYTE; + } + } + else if (op1->isUsedFromReg() && op2->IsIntegralConst(0)) + { + if (compiler->opts.OptimizationEnabled()) + { + emitAttr op1Size = emitActualTypeSize(op1->TypeGet()); + assert((int)op1Size >= 4); + + // Optimize "x<0" and "x>=0" to "x>>31" if "x" is not a jump condition and in a reg. + // Morph/Lowering are responsible to rotate "00" so we won't handle it here. + if ((targetReg != REG_NA) && tree->OperIs(GT_LT, GT_GE) && !tree->IsUnsigned()) + { + if (targetReg != op1->GetRegNum()) + { + inst_RV_RV(INS_mov, targetReg, op1->GetRegNum(), op1->TypeGet()); + } + if (tree->OperIs(GT_GE)) + { + // emit "not" for "x>=0" case + inst_RV(INS_not, targetReg, op1->TypeGet()); + } + inst_RV_IV(INS_shr_N, targetReg, (int)op1Size * 8 - 1, op1Size); + genProduceReg(tree); + return; + } + canReuseFlags = true; + } + + // We're comparing a register to 0 so we can generate "test reg1, reg1" + // instead of the longer "cmp reg1, 0" + ins = INS_test; + op2 = op1; + } + else + { + ins = INS_cmp; + } + + if (type == TYP_UNKNOWN) + { + if (op1Type == op2Type) + { + type = op1Type; + } + else if (genTypeSize(op1Type) == genTypeSize(op2Type)) + { + // If the types are different but have the same size then we'll use TYP_INT or TYP_LONG. + // This primarily deals with small type mixes (e.g. byte/ubyte) that need to be widened + // and compared as int. We should not get long type mixes here but handle that as well + // just in case. + type = genTypeSize(op1Type) == 8 ? TYP_LONG : TYP_INT; + } + else + { + // In the types are different simply use TYP_INT. This deals with small type/int type + // mixes (e.g. byte/short ubyte/int) that need to be widened and compared as int. + // Lowering is expected to handle any mixes that involve long types (e.g. int/long). + type = TYP_INT; + } + + // The common type cannot be smaller than any of the operand types, we're probably mixing int/long + assert(genTypeSize(type) >= max(genTypeSize(op1Type), genTypeSize(op2Type))); + // Small unsigned int types (TYP_BOOL can use anything) should use unsigned comparisons + assert(!(varTypeIsSmallInt(type) && varTypeIsUnsigned(type)) || ((tree->gtFlags & GTF_UNSIGNED) != 0)); + // If op1 is smaller then it cannot be in memory, we're probably missing a cast + assert((genTypeSize(op1Type) >= genTypeSize(type)) || !op1->isUsedFromMemory()); + // If op2 is smaller then it cannot be in memory, we're probably missing a cast + assert((genTypeSize(op2Type) >= genTypeSize(type)) || !op2->isUsedFromMemory()); + // If we ended up with a small type and op2 is a constant then make sure we don't lose constant bits + assert(!op2->IsCnsIntOrI() || !varTypeIsSmall(type) || + genSmallTypeCanRepresentValue(type, op2->AsIntCon()->IconValue())); + } + + // The type cannot be larger than the machine word size + assert(genTypeSize(type) <= genTypeSize(TYP_I_IMPL)); + // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned + assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type)); + + bool needsOCFlags = !tree->OperIs(GT_EQ, GT_NE); + if (canReuseFlags && emit->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(type), needsOCFlags)) + { + JITDUMP("Not emitting compare due to flags being already set\n"); + } + else + { + emit->emitInsBinary(ins, emitTypeSize(type), op1, op2); + } + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + inst_SETCC(GenCondition::FromIntegralRelop(tree), tree->TypeGet(), targetReg); + genProduceReg(tree); + } +} + +#if !defined(TARGET_64BIT) +//------------------------------------------------------------------------ +// genLongToIntCast: Generate code for long to int casts on x86. +// +// Arguments: +// cast - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// The cast node and its sources (via GT_LONG) must have been assigned registers. +// The destination cannot be a floating point type or a small integer type. +// +void CodeGen::genLongToIntCast(GenTree* cast) +{ + assert(cast->OperGet() == GT_CAST); + + GenTree* src = cast->gtGetOp1(); + noway_assert(src->OperGet() == GT_LONG); + + genConsumeRegs(src); + + var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG; + var_types dstType = cast->CastToType(); + regNumber loSrcReg = src->gtGetOp1()->GetRegNum(); + regNumber hiSrcReg = src->gtGetOp2()->GetRegNum(); + regNumber dstReg = cast->GetRegNum(); + + assert((dstType == TYP_INT) || (dstType == TYP_UINT)); + assert(genIsValidIntReg(loSrcReg)); + assert(genIsValidIntReg(hiSrcReg)); + assert(genIsValidIntReg(dstReg)); + + if (cast->gtOverflow()) + { + // + // Generate an overflow check for [u]long to [u]int casts: + // + // long -> int - check if the upper 33 bits are all 0 or all 1 + // + // ulong -> int - check if the upper 33 bits are all 0 + // + // long -> uint - check if the upper 32 bits are all 0 + // ulong -> uint - check if the upper 32 bits are all 0 + // + + if ((srcType == TYP_LONG) && (dstType == TYP_INT)) + { + BasicBlock* allOne = genCreateTempLabel(); + BasicBlock* success = genCreateTempLabel(); + + inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE); + inst_JMP(EJ_js, allOne); + + inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + inst_JMP(EJ_jmp, success); + + genDefineTempLabel(allOne); + inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + + genDefineTempLabel(success); + } + else + { + if ((srcType == TYP_ULONG) && (dstType == TYP_INT)) + { + inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW); + } + + inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + } + } + + if (dstReg != loSrcReg) + { + inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE); + } + + genProduceReg(cast); +} +#endif + +//------------------------------------------------------------------------ +// genIntCastOverflowCheck: Generate overflow checking code for an integer cast. +// +// Arguments: +// cast - The GT_CAST node +// desc - The cast description +// reg - The register containing the value to check +// +void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg) +{ + switch (desc.CheckKind()) + { + case GenIntCastDesc::CHECK_POSITIVE: + GetEmitter()->emitIns_R_R(INS_test, EA_SIZE(desc.CheckSrcSize()), reg, reg); + genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); + break; + +#ifdef TARGET_64BIT + case GenIntCastDesc::CHECK_UINT_RANGE: + { + // We need to check if the value is not greater than 0xFFFFFFFF but this value + // cannot be encoded in an immediate operand. Use a right shift to test if the + // upper 32 bits are zero. This requires a temporary register. + const regNumber tempReg = cast->GetSingleTempReg(); + assert(tempReg != reg); + GetEmitter()->emitIns_R_R(INS_mov, EA_8BYTE, tempReg, reg); + GetEmitter()->emitIns_R_I(INS_shr_N, EA_8BYTE, tempReg, 32); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + } + break; + + case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE: + GetEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MAX); + genJumpToThrowHlpBlk(EJ_ja, SCK_OVERFLOW); + break; + + case GenIntCastDesc::CHECK_INT_RANGE: + GetEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MAX); + genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW); + GetEmitter()->emitIns_R_I(INS_cmp, EA_8BYTE, reg, INT32_MIN); + genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); + break; +#endif + + default: + { + assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE); + const int castMaxValue = desc.CheckSmallIntMax(); + const int castMinValue = desc.CheckSmallIntMin(); + + GetEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(desc.CheckSrcSize()), reg, castMaxValue); + genJumpToThrowHlpBlk((castMinValue == 0) ? EJ_ja : EJ_jg, SCK_OVERFLOW); + + if (castMinValue != 0) + { + GetEmitter()->emitIns_R_I(INS_cmp, EA_SIZE(desc.CheckSrcSize()), reg, castMinValue); + genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); + } + } + break; + } +} + +//------------------------------------------------------------------------ +// genIntToIntCast: Generate code for an integer cast, with or without overflow check. +// +// Arguments: +// cast - The GT_CAST node +// +// Assumptions: +// The cast node is not a contained node and must have an assigned register. +// Neither the source nor target type can be a floating point type. +// On x86 casts to (U)BYTE require that the source be in a byte register. +// +// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register. +// +void CodeGen::genIntToIntCast(GenTreeCast* cast) +{ + genConsumeRegs(cast->gtGetOp1()); + + const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + const regNumber dstReg = cast->GetRegNum(); + emitter* emit = GetEmitter(); + + assert(genIsValidIntReg(srcReg)); + assert(genIsValidIntReg(dstReg)); + + GenIntCastDesc desc(cast); + + if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) + { + genIntCastOverflowCheck(cast, desc, srcReg); + } + + if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg)) + { + instruction ins; + unsigned insSize; + bool canSkip = false; + + switch (desc.ExtendKind()) + { + case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + ins = INS_movzx; + insSize = desc.ExtendSrcSize(); + break; + case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + ins = INS_movsx; + insSize = desc.ExtendSrcSize(); + break; +#ifdef TARGET_64BIT + case GenIntCastDesc::ZERO_EXTEND_INT: + // We can skip emitting this zero extending move if the previous instruction zero extended implicitly + if ((srcReg == dstReg) && compiler->opts.OptimizationEnabled()) + { + canSkip = emit->AreUpper32BitsZero(srcReg); + } + + ins = INS_mov; + insSize = 4; + break; + case GenIntCastDesc::SIGN_EXTEND_INT: + ins = INS_movsxd; + insSize = 4; + break; +#endif + default: + assert(desc.ExtendKind() == GenIntCastDesc::COPY); + assert(srcReg != dstReg); + ins = INS_mov; + insSize = desc.ExtendSrcSize(); + break; + } + + if (canSkip) + { + JITDUMP("\n -- suppressing emission as previous instruction already properly extends.\n"); + } + else + { + emit->emitIns_R_R(ins, EA_ATTR(insSize), dstReg, srcReg); + } + } + + genProduceReg(cast); +} + +//------------------------------------------------------------------------ +// genFloatToFloatCast: Generate code for a cast between float and double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// The cast is between float and double or vice versa. +// +void CodeGen::genFloatToFloatCast(GenTree* treeNode) +{ + // float <--> double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; +#ifdef DEBUG + // If not contained, must be a valid float reg. + if (op1->isUsedFromReg()) + { + assert(genIsValidFloatReg(op1->GetRegNum())); + } +#endif + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + genConsumeOperands(treeNode->AsOp()); + if (srcType == dstType && (op1->isUsedFromReg() && (targetReg == op1->GetRegNum()))) + { + // source and destinations types are the same and also reside in the same register. + // we just need to consume and produce the reg in this case. + ; + } + else + { + instruction ins = ins_FloatConv(dstType, srcType); + GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genIntToFloatCast: Generate code to cast an int/long to float/double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType= int32/uint32/int64/uint64 and DstType=float/double. +// +void CodeGen::genIntToFloatCast(GenTree* treeNode) +{ + // int type --> float/double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidFloatReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; +#ifdef DEBUG + if (op1->isUsedFromReg()) + { + assert(genIsValidIntReg(op1->GetRegNum())); + } +#endif + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + +#if !defined(TARGET_64BIT) + // We expect morph to replace long to float/double casts with helper calls + noway_assert(!varTypeIsLong(srcType)); +#endif // !defined(TARGET_64BIT) + + // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we + // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except + // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered + // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack, + // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type + // temp and using temp as operand of cast operation. + if (srcType == TYP_BYREF) + { + noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR); + srcType = TYP_I_IMPL; + } + + // force the srcType to unsigned if GT_UNSIGNED flag is set + if (treeNode->gtFlags & GTF_UNSIGNED) + { + srcType = genUnsignedType(srcType); + } + + noway_assert(!varTypeIsGC(srcType)); + + // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long). + // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect + // either the front-end or lowering phase to have generated two levels of cast. + // The first one is for widening smaller int type to int32 and the second one is + // to the float/double. + emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); + noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG)))); + + // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions + // here since they should have been lowered apropriately. + noway_assert(srcType != TYP_UINT); + noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT)); + + // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used + // which does a partial write to lower 4/8 bytes of xmm register keeping the other + // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop, + // the partial write could introduce a false dependency and could cause a stall + // if there are further uses of xmmReg. We have such a case occurring with a + // customer reported version of SpectralNorm benchmark, resulting in 2x perf + // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before + // cvtsi2ss/sd instruction. + + genConsumeOperands(treeNode->AsOp()); + GetEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->GetRegNum(), treeNode->GetRegNum()); + + // Note that here we need to specify srcType that will determine + // the size of source reg/mem operand and rex.w prefix. + instruction ins = ins_FloatConv(dstType, TYP_INT); + GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); + + // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction + // will interpret ULONG value as LONG. Hence we need to adjust the + // result if sign-bit of srcType is set. + if (srcType == TYP_ULONG) + { + // The instruction sequence below is less accurate than what clang + // and gcc generate. However, we keep the current sequence for backward compatibility. + // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule + // should be also updated for consistent conversion result. + assert(dstType == TYP_DOUBLE); + assert(op1->isUsedFromReg()); + + // Set the flags without modifying op1. + // test op1Reg, op1Reg + inst_RV_RV(INS_test, op1->GetRegNum(), op1->GetRegNum(), srcType); + + // No need to adjust result if op1 >= 0 i.e. positive + // Jge label + BasicBlock* label = genCreateTempLabel(); + inst_JMP(EJ_jge, label); + + // Adjust the result + // result = result + 0x43f00000 00000000 + // addsd resultReg, 0x43f00000 00000000 + CORINFO_FIELD_HANDLE* cns = &u8ToDblBitmask; + if (*cns == nullptr) + { + double d; + static_assert_no_msg(sizeof(double) == sizeof(__int64)); + *((__int64*)&d) = 0x43f0000000000000LL; + + *cns = GetEmitter()->emitFltOrDblConst(d, EA_8BYTE); + } + GetEmitter()->emitIns_R_C(INS_addsd, EA_8BYTE, treeNode->GetRegNum(), *cns, 0); + + genDefineTempLabel(label); + } + + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genFloatToIntCast: Generate code to cast float/double to int/long +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// SrcType=float/double and DstType= int32/uint32/int64/uint64 +// +// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64 +// +void CodeGen::genFloatToIntCast(GenTree* treeNode) +{ + // we don't expect to see overflow detecting float/double --> int type conversions here + // as they should have been converted into helper calls by front-end. + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->GetRegNum(); + assert(genIsValidIntReg(targetReg)); + + GenTree* op1 = treeNode->AsOp()->gtOp1; +#ifdef DEBUG + if (op1->isUsedFromReg()) + { + assert(genIsValidFloatReg(op1->GetRegNum())); + } +#endif + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); + + // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG). + // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the + // front-end or lowering phase to have generated two levels of cast. The first one is + // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to + // the required smaller int type. + emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); + noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); + + // We shouldn't be seeing uint64 here as it should have been converted + // into a helper call by either front-end or lowering phase. + noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); + + // If the dstType is TYP_UINT, we have 32-bits to encode the + // float number. Any of 33rd or above bits can be the sign bit. + // To achieve it we pretend as if we are converting it to a long. + if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT)))) + { + dstType = TYP_LONG; + } + + // Note that we need to specify dstType here so that it will determine + // the size of destination integer register and also the rex.w prefix. + genConsumeOperands(treeNode->AsOp()); + instruction ins = ins_FloatConv(TYP_INT, srcType); + GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); + genProduceReg(treeNode); +} + +//------------------------------------------------------------------------ +// genCkfinite: Generate code for ckfinite opcode. +// +// Arguments: +// treeNode - The GT_CKFINITE node +// +// Return Value: +// None. +// +// Assumptions: +// GT_CKFINITE node has reserved an internal register. +// +// TODO-XArch-CQ - mark the operand as contained if known to be in +// memory (e.g. field or an array element). +// +void CodeGen::genCkfinite(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_CKFINITE); + + GenTree* op1 = treeNode->AsOp()->gtOp1; + var_types targetType = treeNode->TypeGet(); + int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent. + regNumber targetReg = treeNode->GetRegNum(); + + // Extract exponent into a register. + regNumber tmpReg = treeNode->GetSingleTempReg(); + + genConsumeReg(op1); + +#ifdef TARGET_64BIT + + // Copy the floating-point value to an integer register. If we copied a float to a long, then + // right-shift the value so the high 32 bits of the floating-point value sit in the low 32 + // bits of the integer register. + regNumber srcReg = op1->GetRegNum(); + var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG); + inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType); + if (targetType == TYP_DOUBLE) + { + // right shift by 32 bits to get to exponent. + inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32); + } + + // Mask exponent with all 1's and check if the exponent is all 1's + inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); + inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); + + // If exponent is all 1's, throw ArithmeticException + genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); + + // if it is a finite value copy it to targetReg + if (targetReg != op1->GetRegNum()) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); + } + +#else // !TARGET_64BIT + + // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register. + // There is no easy way to do this. To not require an extra register, we'll use shuffles + // to move the high 32 bits into the low 32 bits, then shuffle it back, since we + // need to produce the value into the target register. + // + // For TYP_DOUBLE, we'll generate (for targetReg != op1->GetRegNum()): + // movaps targetReg, op1->GetRegNum() + // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY + // mov_xmm2i tmpReg, targetReg // tmpReg <= Y + // and tmpReg, + // cmp tmpReg, + // je + // movaps targetReg, op1->GetRegNum() // copy the value again, instead of un-shuffling it + // + // For TYP_DOUBLE with (targetReg == op1->GetRegNum()): + // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY + // mov_xmm2i tmpReg, targetReg // tmpReg <= Y + // and tmpReg, + // cmp tmpReg, + // je + // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX + // + // For TYP_FLOAT, it's the same as TARGET_64BIT: + // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits + // and tmpReg, + // cmp tmpReg, + // je + // movaps targetReg, op1->GetRegNum() // only if targetReg != op1->GetRegNum() + + regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp. + + if (targetType == TYP_DOUBLE) + { + if (targetReg != op1->GetRegNum()) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); + } + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); + copyToTmpSrcReg = targetReg; + } + else + { + copyToTmpSrcReg = op1->GetRegNum(); + } + + // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point + // value, no matter the floating-point type. + inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT); + + // Mask exponent with all 1's and check if the exponent is all 1's + inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); + inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); + + // If exponent is all 1's, throw ArithmeticException + genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); + + if (targetReg != op1->GetRegNum()) + { + // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched, + // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE + // than re-shuffling the targetReg. + inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); + } + else if (targetType == TYP_DOUBLE) + { + // We need to re-shuffle the targetReg to get the correct result. + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); + } + +#endif // !TARGET_64BIT + + genProduceReg(treeNode); +} + +#ifdef TARGET_AMD64 +int CodeGenInterface::genSPtoFPdelta() const +{ + int delta; + +#ifdef UNIX_AMD64_ABI + + // We require frame chaining on Unix to support native tool unwinding (such as + // unwinding by the native debugger). We have a CLR-only extension to the + // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240. + // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated. + delta = genTotalFrameSize(); + +#else // !UNIX_AMD64_ABI + + // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if + // RBP needs to be reported in unwind codes. This case would arise for methods + // with localloc. + if (compiler->compLocallocUsed) + { + // We cannot base delta computation on compLclFrameSize since it changes from + // tentative to final frame layout and hence there is a possibility of + // under-estimating offset of vars from FP, which in turn results in under- + // estimating instruction size. + // + // To be predictive and so as never to under-estimate offset of vars from FP + // we will always position FP at min(240, outgoing arg area size). + delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize); + } + else if (compiler->opts.compDbgEnC) + { + // vm assumption on EnC methods is that rsp and rbp are equal + delta = 0; + } + else + { + delta = genTotalFrameSize(); + } + +#endif // !UNIX_AMD64_ABI + + return delta; +} + +//--------------------------------------------------------------------- +// genTotalFrameSize - return the total size of the stack frame, including local size, +// callee-saved register size, etc. For AMD64, this does not include the caller-pushed +// return address. +// +// Return value: +// Total frame size +// + +int CodeGenInterface::genTotalFrameSize() const +{ + assert(!IsUninitialized(compiler->compCalleeRegsPushed)); + + int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; + + assert(totalFrameSize >= 0); + return totalFrameSize; +} + +//--------------------------------------------------------------------- +// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. +// This number is going to be negative, since the Caller-SP is at a higher +// address than the frame pointer. +// +// There must be a frame pointer to call this function! +// +// We can't compute this directly from the Caller-SP, since the frame pointer +// is based on a maximum delta from Initial-SP, so first we find SP, then +// compute the FP offset. + +int CodeGenInterface::genCallerSPtoFPdelta() const +{ + assert(isFramePointerUsed()); + int callerSPtoFPdelta; + + callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); + + assert(callerSPtoFPdelta <= 0); + return callerSPtoFPdelta; +} + +//--------------------------------------------------------------------- +// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. +// +// This number will be negative. + +int CodeGenInterface::genCallerSPtoInitialSPdelta() const +{ + int callerSPtoSPdelta = 0; + + callerSPtoSPdelta -= genTotalFrameSize(); + callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address + + // compCalleeRegsPushed does not account for the frame pointer + // TODO-Cleanup: shouldn't this be part of genTotalFrameSize? + if (isFramePointerUsed()) + { + callerSPtoSPdelta -= REGSIZE_BYTES; + } + + assert(callerSPtoSPdelta <= 0); + return callerSPtoSPdelta; +} +#endif // TARGET_AMD64 + +//----------------------------------------------------------------------------------------- +// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask" +// +// Arguments: +// treeNode - tree node +// +// Return value: +// None +// +// Assumptions: +// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs() +// ii) tree type is floating point type. +// iii) caller of this routine needs to call genProduceReg() +void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) +{ + regNumber targetReg = treeNode->GetRegNum(); + regNumber operandReg = genConsumeReg(treeNode->gtGetOp1()); + emitAttr size = emitTypeSize(treeNode); + + assert(varTypeIsFloating(treeNode->TypeGet())); + assert(treeNode->gtGetOp1()->isUsedFromReg()); + + CORINFO_FIELD_HANDLE* maskFld = nullptr; + UINT64 mask = 0; + instruction ins = INS_invalid; + + if (treeNode->OperIs(GT_NEG)) + { + // Neg(x) = flip the sign bit. + // Neg(f) = f ^ 0x80000000 x4 (packed) + // Neg(d) = d ^ 0x8000000000000000 x2 (packed) + ins = INS_xorps; + mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL; + maskFld = treeNode->TypeIs(TYP_FLOAT) ? &negBitmaskFlt : &negBitmaskDbl; + } + else if (treeNode->OperIs(GT_INTRINSIC)) + { + assert(treeNode->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs); + // Abs(x) = set sign-bit to zero + // Abs(f) = f & 0x7fffffff x4 (packed) + // Abs(d) = d & 0x7fffffffffffffff x2 (packed) + ins = INS_andps; + mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7fffffff7fffffffUL : 0x7fffffffffffffffUL; + maskFld = treeNode->TypeIs(TYP_FLOAT) ? &absBitmaskFlt : &absBitmaskDbl; + } + else + { + assert(!"genSSE2BitwiseOp: unsupported oper"); + } + + if (*maskFld == nullptr) + { + UINT64 maskPack[] = {mask, mask}; + *maskFld = GetEmitter()->emitBlkConst(&maskPack, 16, 16, treeNode->TypeGet()); + } + + GetEmitter()->emitIns_SIMD_R_R_C(ins, size, targetReg, operandReg, *maskFld, 0); +} + +//----------------------------------------------------------------------------------------- +// genSSE41RoundOp - generate SSE41 code for the given tree as a round operation +// +// Arguments: +// treeNode - tree node +// +// Return value: +// None +// +// Assumptions: +// i) SSE4.1 is supported by the underlying hardware +// ii) treeNode oper is a GT_INTRINSIC +// iii) treeNode type is a floating point type +// iv) treeNode is not used from memory +// v) tree oper is NI_System_Math{F}_Round, _Ceiling, or _Floor +// vi) caller of this routine needs to call genProduceReg() +void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) +{ + // i) SSE4.1 is supported by the underlying hardware + assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE41)); + + // ii) treeNode oper is a GT_INTRINSIC + assert(treeNode->OperGet() == GT_INTRINSIC); + + GenTree* srcNode = treeNode->gtGetOp1(); + + // iii) treeNode type is floating point type + assert(varTypeIsFloating(srcNode)); + assert(srcNode->TypeGet() == treeNode->TypeGet()); + + // iv) treeNode is not used from memory + assert(!treeNode->isUsedFromMemory()); + + genConsumeOperands(treeNode); + + instruction ins = (treeNode->TypeGet() == TYP_FLOAT) ? INS_roundss : INS_roundsd; + emitAttr size = emitTypeSize(treeNode); + + regNumber dstReg = treeNode->GetRegNum(); + + unsigned ival = 0; + + // v) tree oper is NI_System_Math{F}_Round, _Ceiling, or _Floor + switch (treeNode->AsIntrinsic()->gtIntrinsicName) + { + case NI_System_Math_Round: + ival = 4; + break; + + case NI_System_Math_Ceiling: + ival = 10; + break; + + case NI_System_Math_Floor: + ival = 9; + break; + + default: + ins = INS_invalid; + assert(!"genSSE41RoundOp: unsupported intrinsic"); + unreached(); + } + + if (srcNode->isContained() || srcNode->isUsedFromSpillTemp()) + { + emitter* emit = GetEmitter(); + + TempDsc* tmpDsc = nullptr; + unsigned varNum = BAD_VAR_NUM; + unsigned offset = (unsigned)-1; + + if (srcNode->isUsedFromSpillTemp()) + { + assert(srcNode->IsRegOptional()); + + tmpDsc = getSpillTempDsc(srcNode); + varNum = tmpDsc->tdTempNum(); + offset = 0; + + regSet.tmpRlsTemp(tmpDsc); + } + else if (srcNode->isIndir()) + { + GenTreeIndir* memIndir = srcNode->AsIndir(); + GenTree* memBase = memIndir->gtOp1; + + switch (memBase->OperGet()) + { + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: + { + assert(memBase->isContained()); + varNum = memBase->AsLclVarCommon()->GetLclNum(); + offset = memBase->AsLclVarCommon()->GetLclOffs(); + + // Ensure that all the GenTreeIndir values are set to their defaults. + assert(memBase->GetRegNum() == REG_NA); + assert(!memIndir->HasIndex()); + assert(memIndir->Scale() == 1); + assert(memIndir->Offset() == 0); + + break; + } + + case GT_CLS_VAR_ADDR: + { + emit->emitIns_R_C_I(ins, size, dstReg, memBase->AsClsVar()->gtClsVarHnd, 0, ival); + return; + } + + default: + { + emit->emitIns_R_A_I(ins, size, dstReg, memIndir, ival); + return; + } + } + } + else + { + switch (srcNode->OperGet()) + { + case GT_CNS_DBL: + { + GenTreeDblCon* dblConst = srcNode->AsDblCon(); + CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst->gtDconVal, emitTypeSize(dblConst)); + + emit->emitIns_R_C_I(ins, size, dstReg, hnd, 0, ival); + return; + } + + case GT_LCL_FLD: + varNum = srcNode->AsLclFld()->GetLclNum(); + offset = srcNode->AsLclFld()->GetLclOffs(); + break; + + case GT_LCL_VAR: + { + assert(srcNode->IsRegOptional() || + !compiler->lvaTable[srcNode->AsLclVar()->GetLclNum()].lvIsRegCandidate()); + + varNum = srcNode->AsLclVar()->GetLclNum(); + offset = 0; + break; + } + + default: + unreached(); + break; + } + } + + // Ensure we got a good varNum and offset. + // We also need to check for `tmpDsc != nullptr` since spill temp numbers + // are negative and start with -1, which also happens to be BAD_VAR_NUM. + assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); + assert(offset != (unsigned)-1); + + emit->emitIns_R_S_I(ins, size, dstReg, varNum, offset, ival); + } + else + { + inst_RV_RV_IV(ins, size, dstReg, srcNode->GetRegNum(), ival); + } +} + +//--------------------------------------------------------------------- +// genIntrinsic - generate code for a given intrinsic +// +// Arguments +// treeNode - the GT_INTRINSIC node +// +// Return value: +// None +// +void CodeGen::genIntrinsic(GenTree* treeNode) +{ + // Handle intrinsics that can be implemented by target-specific instructions + switch (treeNode->AsIntrinsic()->gtIntrinsicName) + { + case NI_System_Math_Abs: + genSSE2BitwiseOp(treeNode); + break; + + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: + case NI_System_Math_Round: + genSSE41RoundOp(treeNode->AsOp()); + break; + + case NI_System_Math_Sqrt: + { + // Both operand and its result must be of the same floating point type. + GenTree* srcNode = treeNode->AsOp()->gtOp1; + assert(varTypeIsFloating(srcNode)); + assert(srcNode->TypeGet() == treeNode->TypeGet()); + + genConsumeOperands(treeNode->AsOp()); + GetEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, srcNode); + break; + } + + default: + assert(!"genIntrinsic: Unsupported intrinsic"); + unreached(); + } + + genProduceReg(treeNode); +} + +//---------------------------------------------------------------------- +// genBitCast - Generate the instruction to move a value between register files +// +// Arguments +// targetType - the destination type +// targetReg - the destination register +// srcType - the source type +// srcReg - the source register +// +void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types srcType, regNumber srcReg) +{ + const bool srcFltReg = varTypeUsesFloatReg(srcType) || varTypeIsSIMD(srcType); + assert(srcFltReg == genIsValidFloatReg(srcReg)); + const bool dstFltReg = varTypeUsesFloatReg(targetType) || varTypeIsSIMD(targetType); + assert(dstFltReg == genIsValidFloatReg(targetReg)); + if (srcFltReg != dstFltReg) + { + inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType); + } + else if (targetReg != srcReg) + { + inst_RV_RV(ins_Copy(targetType), targetReg, srcReg, targetType); + } +} + +//---------------------------------------------------------------------- +// genCodeForBitCast - Generate code for a GT_BITCAST that is not contained +// +// Arguments +// treeNode - the GT_BITCAST for which we're generating code +// +void CodeGen::genCodeForBitCast(GenTreeOp* treeNode) +{ + regNumber targetReg = treeNode->GetRegNum(); + var_types targetType = treeNode->TypeGet(); + GenTree* op1 = treeNode->gtGetOp1(); + genConsumeRegs(op1); + + if (op1->isContained()) + { + assert(op1->IsLocal() || op1->isIndir()); + if (genIsRegCandidateLocal(op1)) + { + unsigned lclNum = op1->AsLclVar()->GetLclNum(); + GetEmitter()->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lclNum)), + emitTypeSize(treeNode), targetReg, lclNum, 0); + } + else + { + op1->gtType = treeNode->TypeGet(); + op1->SetRegNum(targetReg); + op1->ClearContained(); + JITDUMP("Changing type of BITCAST source to load directly."); + genCodeForTreeNode(op1); + } + } + else + { + genBitCast(targetType, targetReg, op1->TypeGet(), op1->GetRegNum()); + } + genProduceReg(treeNode); +} + +//-------------------------------------------------------------------------- // +// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// The number of the base variable. +// +// Note: +// If tail call the outgoing args are placed in the caller's incoming arg stack space. +// Otherwise, they go in the outgoing arg area on the current frame. +// +// On Windows the caller always creates slots (homing space) in its frame for the +// first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0. +// For System V systems there is no such calling convention requirement, and the code needs to find +// the first stack passed argument from the caller. This is done by iterating over +// all the lvParam variables and finding the first with GetArgReg() equals to REG_STK. +// +unsigned CodeGen::getBaseVarForPutArgStk(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_PUTARG_STK); + + unsigned baseVarNum; + + // Whether to setup stk arg in incoming or out-going arg area? + // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. + // All other calls - stk arg is setup in out-going arg area. + if (treeNode->AsPutArgStk()->putInIncomingArgArea()) + { + // See the note in the function header re: finding the first stack passed argument. + baseVarNum = getFirstArgWithStackSlot(); + assert(baseVarNum != BAD_VAR_NUM); + +#ifdef DEBUG + // This must be a fast tail call. + assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); + + // Since it is a fast tail call, the existence of first incoming arg is guaranteed + // because fast tail call requires that in-coming arg area of caller is >= out-going + // arg area required for tail call. + LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]); + assert(varDsc != nullptr); + +#ifdef UNIX_AMD64_ABI + assert(!varDsc->lvIsRegArg && varDsc->GetArgReg() == REG_STK); +#else // !UNIX_AMD64_ABI + // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0. + assert(varDsc->lvIsRegArg && (varDsc->GetArgReg() == REG_ARG_0 || varDsc->GetArgReg() == REG_FLTARG_0)); +#endif // !UNIX_AMD64_ABI +#endif // !DEBUG + } + else + { +#if FEATURE_FIXED_OUT_ARGS + baseVarNum = compiler->lvaOutgoingArgSpaceVar; +#else // !FEATURE_FIXED_OUT_ARGS + assert(!"No BaseVarForPutArgStk on x86"); + baseVarNum = BAD_VAR_NUM; +#endif // !FEATURE_FIXED_OUT_ARGS + } + + return baseVarNum; +} + +//--------------------------------------------------------------------- +// genAlignStackBeforeCall: Align the stack if necessary before a call. +// +// Arguments: +// putArgStk - the putArgStk node. +// +void CodeGen::genAlignStackBeforeCall(GenTreePutArgStk* putArgStk) +{ +#if defined(UNIX_X86_ABI) + + genAlignStackBeforeCall(putArgStk->gtCall); + +#endif // UNIX_X86_ABI +} + +//--------------------------------------------------------------------- +// genAlignStackBeforeCall: Align the stack if necessary before a call. +// +// Arguments: +// call - the call node. +// +void CodeGen::genAlignStackBeforeCall(GenTreeCall* call) +{ +#if defined(UNIX_X86_ABI) + + // Have we aligned the stack yet? + if (!call->fgArgInfo->IsStkAlignmentDone()) + { + // We haven't done any stack alignment yet for this call. We might need to create + // an alignment adjustment, even if this function itself doesn't have any stack args. + // This can happen if this function call is part of a nested call sequence, and the outer + // call has already pushed some arguments. + + unsigned stkLevel = genStackLevel + call->fgArgInfo->GetStkSizeBytes(); + call->fgArgInfo->ComputeStackAlignment(stkLevel); + + unsigned padStkAlign = call->fgArgInfo->GetStkAlign(); + if (padStkAlign != 0) + { + // Now generate the alignment + inst_RV_IV(INS_sub, REG_SPBASE, padStkAlign, EA_PTRSIZE); + AddStackLevel(padStkAlign); + AddNestedAlignment(padStkAlign); + } + + call->fgArgInfo->SetStkAlignmentDone(); + } + +#endif // UNIX_X86_ABI +} + +//--------------------------------------------------------------------- +// genRemoveAlignmentAfterCall: After a call, remove the alignment +// added before the call, if any. +// +// Arguments: +// call - the call node. +// bias - additional stack adjustment +// +// Note: +// When bias > 0, caller should adjust stack level appropriately as +// bias is not considered when adjusting stack level. +// +void CodeGen::genRemoveAlignmentAfterCall(GenTreeCall* call, unsigned bias) +{ +#if defined(TARGET_X86) +#if defined(UNIX_X86_ABI) + // Put back the stack pointer if there was any padding for stack alignment + unsigned padStkAlign = call->fgArgInfo->GetStkAlign(); + unsigned padStkAdjust = padStkAlign + bias; + + if (padStkAdjust != 0) + { + inst_RV_IV(INS_add, REG_SPBASE, padStkAdjust, EA_PTRSIZE); + SubtractStackLevel(padStkAlign); + SubtractNestedAlignment(padStkAlign); + } +#else // UNIX_X86_ABI + if (bias != 0) + { + genAdjustSP(bias); + } +#endif // !UNIX_X86_ABI_ +#else // TARGET_X86 + assert(bias == 0); +#endif // !TARGET_X86 +} + +#ifdef TARGET_X86 + +//--------------------------------------------------------------------- +// genAdjustStackForPutArgStk: +// adjust the stack pointer for a putArgStk node if necessary. +// +// Arguments: +// putArgStk - the putArgStk node. +// +// Returns: true if the stack pointer was adjusted; false otherwise. +// +// Notes: +// Sets `m_pushStkArg` to true if the stack arg needs to be pushed, +// false if the stack arg needs to be stored at the current stack +// pointer address. This is exactly the opposite of the return value +// of this function. +// +bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) +{ + const unsigned argSize = putArgStk->GetStackByteSize(); + GenTree* source = putArgStk->gtGetOp1(); + +#ifdef FEATURE_SIMD + if (!source->OperIs(GT_FIELD_LIST) && varTypeIsSIMD(source)) + { + inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + AddStackLevel(argSize); + m_pushStkArg = false; + return true; + } +#endif // FEATURE_SIMD + + // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack. + // This is set in Lowering, and is true if and only if: + // - This argument contains any GC pointers OR + // - It is a GT_FIELD_LIST OR + // - It is less than 16 bytes in size. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + switch (putArgStk->gtPutArgStkKind) + { + case GenTreePutArgStk::Kind::RepInstr: + case GenTreePutArgStk::Kind::Unroll: + assert(!source->AsObj()->GetLayout()->HasGCPtr() && (argSize >= 16)); + break; + case GenTreePutArgStk::Kind::Push: + case GenTreePutArgStk::Kind::PushAllSlots: + assert(source->OperIs(GT_FIELD_LIST) || source->AsObj()->GetLayout()->HasGCPtr() || (argSize < 16)); + break; + case GenTreePutArgStk::Kind::Invalid: + default: + assert(!"Uninitialized GenTreePutArgStk::Kind"); + break; + } +#endif // DEBUG + + if (putArgStk->isPushKind()) + { + m_pushStkArg = true; + return false; + } + else + { + m_pushStkArg = false; + + // If argSize is large, we need to probe the stack like we do in the prolog (genAllocLclFrame) + // or for localloc (genLclHeap), to ensure we touch the stack pages sequentially, and don't miss + // the stack guard pages. The prolog probes, but we don't know at this point how much higher + // the last probed stack pointer value is. We default a threshold. Any size below this threshold + // we are guaranteed the stack has been probed. Above this threshold, we don't know. The threshold + // should be high enough to cover all common cases. Increasing the threshold means adding a few + // more "lowest address of stack" probes in the prolog. Since this is relatively rare, add it to + // stress modes. + + if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) || + compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5)) + { + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, REG_NA); + } + else + { + inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + } + + AddStackLevel(argSize); + return true; + } +} + +//--------------------------------------------------------------------- +// genPutArgStkFieldList - generate code for passing a GT_FIELD_LIST arg on the stack. +// +// Arguments +// treeNode - the GT_PUTARG_STK node whose op1 is a GT_FIELD_LIST +// +// Return value: +// None +// +void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) +{ + GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList(); + assert(fieldList != nullptr); + + // Set m_pushStkArg and pre-adjust the stack if necessary. + const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk); + + // For now, we only support the "push" case; we will push a full slot for the first field of each slot + // within the struct. + assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg); + + // If we have pre-adjusted the stack and are simply storing the fields in order, set the offset to 0. + // (Note that this mode is not currently being used.) + // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them + // in reverse order, so we start with the current field offset at the size of the struct arg (which must be + // a multiple of the target pointer size). + unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->GetStackByteSize(); + unsigned prevFieldOffset = currentOffset; + regNumber intTmpReg = REG_NA; + regNumber simdTmpReg = REG_NA; + if (putArgStk->AvailableTempRegCount() != 0) + { + regMaskTP rsvdRegs = putArgStk->gtRsvdRegs; + if ((rsvdRegs & RBM_ALLINT) != 0) + { + intTmpReg = putArgStk->GetSingleTempReg(RBM_ALLINT); + assert(genIsValidIntReg(intTmpReg)); + } + if ((rsvdRegs & RBM_ALLFLOAT) != 0) + { + simdTmpReg = putArgStk->GetSingleTempReg(RBM_ALLFLOAT); + assert(genIsValidFloatReg(simdTmpReg)); + } + assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1)); + } + + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + GenTree* const fieldNode = use.GetNode(); + const unsigned fieldOffset = use.GetOffset(); + var_types fieldType = use.GetType(); + + // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the + // field list in descending order by offset. + assert(!varTypeIsLong(fieldType)); + assert(fieldOffset <= prevFieldOffset); + + // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately + // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been + // assigned a register, and which is therefore contained. + // Unlike genConsumeReg(), it handles the case where no registers are being consumed. + genConsumeRegs(fieldNode); + regNumber argReg = fieldNode->isUsedFromSpillTemp() ? REG_NA : fieldNode->GetRegNum(); + + // If the field is slot-like, we can use a push instruction to store the entire register no matter the type. + // + // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up + // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must + // not require rounding. + // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise + // able to detect stores into the outgoing argument area of the stack on x86. + const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4); + int adjustment = roundUp(currentOffset - fieldOffset, 4); + if (fieldIsSlot && !varTypeIsSIMD(fieldType)) + { + fieldType = genActualType(fieldType); + unsigned pushSize = genTypeSize(fieldType); + assert((pushSize % 4) == 0); + adjustment -= pushSize; + while (adjustment != 0) + { + inst_IV(INS_push, 0); + currentOffset -= pushSize; + AddStackLevel(pushSize); + adjustment -= pushSize; + } + m_pushStkArg = true; + } + else + { + m_pushStkArg = false; + + // We always "push" floating point fields (i.e. they are full slot values that don't + // require special handling). + assert(varTypeIsIntegralOrI(fieldNode) || varTypeIsSIMD(fieldNode)); + + // If we can't push this field, it needs to be in a register so that we can store + // it to the stack location. + if (adjustment != 0) + { + // This moves the stack pointer to fieldOffset. + // For this case, we must adjust the stack and generate stack-relative stores rather than pushes. + // Adjust the stack pointer to the next slot boundary. + inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE); + currentOffset -= adjustment; + AddStackLevel(adjustment); + } + + // Does it need to be in a byte register? + // If so, we'll use intTmpReg, which must have been allocated as a byte register. + // If it's already in a register, but not a byteable one, then move it. + if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0))) + { + assert(intTmpReg != REG_NA); + noway_assert((genRegMask(intTmpReg) & RBM_BYTE_REGS) != 0); + if (argReg != REG_NA) + { + inst_RV_RV(INS_mov, intTmpReg, argReg, fieldType); + argReg = intTmpReg; + } + } + } + + if (argReg == REG_NA) + { + if (m_pushStkArg) + { + if (fieldNode->isUsedFromSpillTemp()) + { + assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD? + assert(fieldNode->IsRegOptional()); + TempDsc* tmp = getSpillTempDsc(fieldNode); + GetEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0); + regSet.tmpRlsTemp(tmp); + } + else + { + assert(varTypeIsIntegralOrI(fieldNode)); + switch (fieldNode->OperGet()) + { + case GT_LCL_VAR: + inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet())); + break; + case GT_CNS_INT: + if (fieldNode->IsIconHandle()) + { + inst_IV_handle(INS_push, fieldNode->AsIntCon()->gtIconVal); + } + else + { + inst_IV(INS_push, fieldNode->AsIntCon()->gtIconVal); + } + break; + default: + unreached(); + } + } + currentOffset -= TARGET_POINTER_SIZE; + AddStackLevel(TARGET_POINTER_SIZE); + } + else + { + // The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack. + assert(varTypeIsIntegralOrI(fieldNode)); + switch (fieldNode->OperGet()) + { + case GT_LCL_VAR: + inst_RV_TT(INS_mov, intTmpReg, fieldNode); + break; + case GT_CNS_INT: + genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode); + break; + default: + unreached(); + } + genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset); + } + } + else + { +#if defined(FEATURE_SIMD) + if (fieldType == TYP_SIMD12) + { + assert(genIsValidFloatReg(simdTmpReg)); + genStoreSIMD12ToStack(argReg, simdTmpReg); + } + else +#endif // defined(FEATURE_SIMD) + { + genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset); + } + if (m_pushStkArg) + { + // We always push a slot-rounded size + currentOffset -= genTypeSize(fieldType); + } + } + + prevFieldOffset = fieldOffset; + } + if (currentOffset != 0) + { + // We don't expect padding at the beginning of a struct, but it could happen with explicit layout. + inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE); + AddStackLevel(currentOffset); + } +} +#endif // TARGET_X86 + +//--------------------------------------------------------------------- +// genPutArgStk - generate code for passing an arg on the stack. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// targetType - the type of the treeNode +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) +{ + GenTree* data = putArgStk->gtOp1; + var_types targetType = genActualType(data->TypeGet()); + +#ifdef TARGET_X86 + + genAlignStackBeforeCall(putArgStk); + + if ((data->OperGet() != GT_FIELD_LIST) && varTypeIsStruct(targetType)) + { + (void)genAdjustStackForPutArgStk(putArgStk); + genPutStructArgStk(putArgStk); + return; + } + + // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LISTs of TYP_INT. + assert(targetType != TYP_LONG); + + const unsigned argSize = putArgStk->GetStackByteSize(); + assert((argSize % TARGET_POINTER_SIZE) == 0); + + if (data->isContainedIntOrIImmed()) + { + if (data->IsIconHandle()) + { + inst_IV_handle(INS_push, data->AsIntCon()->gtIconVal); + } + else + { + inst_IV(INS_push, data->AsIntCon()->gtIconVal); + } + AddStackLevel(argSize); + } + else if (data->OperGet() == GT_FIELD_LIST) + { + genPutArgStkFieldList(putArgStk); + } + else + { + // We should not see any contained nodes that are not immediates. + assert(data->isUsedFromReg()); + genConsumeReg(data); + genPushReg(targetType, data->GetRegNum()); + } +#else // !TARGET_X86 + { + unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk); + +#ifdef UNIX_AMD64_ABI + + if (data->OperIs(GT_FIELD_LIST)) + { + genPutArgStkFieldList(putArgStk, baseVarNum); + return; + } + else if (varTypeIsStruct(targetType)) + { + m_stkArgVarNum = baseVarNum; + m_stkArgOffset = putArgStk->getArgOffset(); + genPutStructArgStk(putArgStk); + m_stkArgVarNum = BAD_VAR_NUM; + return; + } +#endif // UNIX_AMD64_ABI + + noway_assert(targetType != TYP_STRUCT); + + // Get argument offset on stack. + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + unsigned argOffset = putArgStk->getArgOffset(); + +#ifdef DEBUG + fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(putArgStk->gtCall, putArgStk); + assert(curArgTabEntry != nullptr); + assert(argOffset == curArgTabEntry->slotNum * TARGET_POINTER_SIZE); +#endif + + if (data->isContainedIntOrIImmed()) + { + GetEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset, + (int)data->AsIntConCommon()->IconValue()); + } + else + { + assert(data->isUsedFromReg()); + genConsumeReg(data); + GetEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->GetRegNum(), baseVarNum, + argOffset); + } + } +#endif // !TARGET_X86 +} + +//--------------------------------------------------------------------- +// genPutArgReg - generate code for a GT_PUTARG_REG node +// +// Arguments +// tree - the GT_PUTARG_REG node +// +// Return value: +// None +// +void CodeGen::genPutArgReg(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_PUTARG_REG)); + + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->GetRegNum(); + +#ifndef UNIX_AMD64_ABI + assert(targetType != TYP_STRUCT); +#endif // !UNIX_AMD64_ABI + + GenTree* op1 = tree->gtOp1; + genConsumeReg(op1); + + // If child node is not already in the register we need, move it + if (targetReg != op1->GetRegNum()) + { + inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType); + } + + genProduceReg(tree); +} + +#ifdef TARGET_X86 +// genPushReg: Push a register value onto the stack and adjust the stack level +// +// Arguments: +// type - the type of value to be stored +// reg - the register containing the value +// +// Notes: +// For TYP_LONG, the srcReg must be a floating point register. +// Otherwise, the register type must be consistent with the given type. +// +void CodeGen::genPushReg(var_types type, regNumber srcReg) +{ + unsigned size = genTypeSize(type); + if (varTypeIsIntegralOrI(type) && type != TYP_LONG) + { + assert(genIsValidIntReg(srcReg)); + inst_RV(INS_push, srcReg, type); + } + else + { + instruction ins; + emitAttr attr = emitTypeSize(type); + if (type == TYP_LONG) + { + // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg. + // This is only used when we are pushing a struct from memory to memory, and basically is + // handling an 8-byte "chunk", as opposed to strictly a long type. + ins = INS_movq; + } + else + { + ins = ins_Store(type); + } + assert(genIsValidFloatReg(srcReg)); + inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE); + GetEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0); + } + AddStackLevel(size); +} +#endif // TARGET_X86 + +#if defined(FEATURE_PUT_STRUCT_ARG_STK) +// genStoreRegToStackArg: Store a register value into the stack argument area +// +// Arguments: +// type - the type of value to be stored +// reg - the register containing the value +// offset - the offset from the base (see Assumptions below) +// +// Notes: +// A type of TYP_STRUCT instructs this method to store a 16-byte chunk +// at the given offset (i.e. not the full struct). +// +// Assumptions: +// The caller must set the context appropriately before calling this method: +// - On x64, m_stkArgVarNum must be set according to whether this is a regular or tail call. +// - On x86, the caller must set m_pushStkArg if this method should push the argument. +// Otherwise, the argument is stored at the given offset from sp. +// +// TODO: In the below code the load and store instructions are for 16 bytes, but the +// type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but +// this probably needs to be changed. +// +void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset) +{ + assert(srcReg != REG_NA); + instruction ins; + emitAttr attr; + unsigned size; + + if (type == TYP_STRUCT) + { + ins = INS_movdqu; + // This should be changed! + attr = EA_8BYTE; + size = 16; + } + else + { +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(type)) + { + assert(genIsValidFloatReg(srcReg)); + ins = ins_Store(type); // TODO-CQ: pass 'aligned' correctly + } + else +#endif // FEATURE_SIMD +#ifdef TARGET_X86 + if (type == TYP_LONG) + { + assert(genIsValidFloatReg(srcReg)); + ins = INS_movq; + } + else +#endif // TARGET_X86 + { + assert((varTypeUsesFloatReg(type) && genIsValidFloatReg(srcReg)) || + (varTypeIsIntegralOrI(type) && genIsValidIntReg(srcReg))); + ins = ins_Store(type); + } + attr = emitTypeSize(type); + size = genTypeSize(type); + } + +#ifdef TARGET_X86 + if (m_pushStkArg) + { + genPushReg(type, srcReg); + } + else + { + GetEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, offset); + } +#else // !TARGET_X86 + assert(m_stkArgVarNum != BAD_VAR_NUM); + GetEmitter()->emitIns_S_R(ins, attr, srcReg, m_stkArgVarNum, m_stkArgOffset + offset); +#endif // !TARGET_X86 +} + +//--------------------------------------------------------------------- +// genPutStructArgStk - generate code for copying a struct arg on the stack by value. +// In case there are references to heap object in the struct, +// it generates the gcinfo as well. +// +// Arguments +// putArgStk - the GT_PUTARG_STK node +// +// Notes: +// In the case of fixed out args, the caller must have set m_stkArgVarNum to the variable number +// corresponding to the argument area (where we will put the argument on the stack). +// For tail calls this is the baseVarNum = 0. +// For non tail calls this is the outgoingArgSpace. +void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) +{ + GenTree* source = putArgStk->gtGetOp1(); + var_types targetType = source->TypeGet(); + +#if defined(TARGET_X86) && defined(FEATURE_SIMD) + if (putArgStk->isSIMD12()) + { + genPutArgStkSIMD12(putArgStk); + return; + } +#endif // defined(TARGET_X86) && defined(FEATURE_SIMD) + + if (varTypeIsSIMD(targetType)) + { + regNumber srcReg = genConsumeReg(source); + assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg))); + genStoreRegToStackArg(targetType, srcReg, 0); + return; + } + + assert(targetType == TYP_STRUCT); + + ClassLayout* layout = source->AsObj()->GetLayout(); + + if (!layout->HasGCPtr()) + { + switch (putArgStk->gtPutArgStkKind) + { + case GenTreePutArgStk::Kind::RepInstr: + genStructPutArgRepMovs(putArgStk); + break; + case GenTreePutArgStk::Kind::Unroll: + genStructPutArgUnroll(putArgStk); + break; + case GenTreePutArgStk::Kind::Push: + genStructPutArgUnroll(putArgStk); + break; + default: + unreached(); + } + } + else + { + // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef TARGET_X86 + // On x86, any struct that has contains GC references must be stored to the stack using `push` instructions so + // that the emitter properly detects the need to update the method's GC information. + // + // Strictly speaking, it is only necessary to use `push` to store the GC references themselves, so for structs + // with large numbers of consecutive non-GC-ref-typed fields, we may be able to improve the code size in the + // future. + assert(m_pushStkArg); + + GenTree* srcAddr = source->gtGetOp1(); + const unsigned byteSize = putArgStk->GetStackByteSize(); + assert(byteSize % TARGET_POINTER_SIZE == 0); + const unsigned numSlots = byteSize / TARGET_POINTER_SIZE; + assert(putArgStk->gtNumSlots == numSlots); + + regNumber srcRegNum = srcAddr->GetRegNum(); + const bool srcAddrInReg = srcRegNum != REG_NA; + + unsigned srcLclNum = 0; + unsigned srcLclOffset = 0; + if (srcAddrInReg) + { + genConsumeReg(srcAddr); + } + else + { + assert(srcAddr->OperIsLocalAddr()); + + srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); + srcLclOffset = srcAddr->AsLclVarCommon()->GetLclOffs(); + } + + for (int i = numSlots - 1; i >= 0; --i) + { + emitAttr slotAttr = emitTypeSize(layout->GetGCPtrType(i)); + const unsigned byteOffset = i * TARGET_POINTER_SIZE; + if (srcAddrInReg) + { + GetEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, byteOffset); + } + else + { + GetEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + byteOffset); + } + AddStackLevel(TARGET_POINTER_SIZE); + } +#else // !defined(TARGET_X86) + + // Consume these registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA); + + const bool srcIsLocal = putArgStk->gtOp1->AsObj()->gtOp1->OperIsLocalAddr(); + const emitAttr srcAddrAttr = srcIsLocal ? EA_PTRSIZE : EA_BYREF; + +#if DEBUG + unsigned numGCSlotsCopied = 0; +#endif // DEBUG + + const unsigned byteSize = putArgStk->GetStackByteSize(); + assert(byteSize % TARGET_POINTER_SIZE == 0); + const unsigned numSlots = byteSize / TARGET_POINTER_SIZE; + assert(putArgStk->gtNumSlots == numSlots); + for (unsigned i = 0; i < numSlots;) + { + if (!layout->IsGCPtr(i)) + { + // Let's see if we can use rep movsp (alias for movsd or movsq for 32 and 64 bits respectively) + // instead of a sequence of movsp instructions to save cycles and code size. + unsigned adjacentNonGCSlotCount = 0; + do + { + adjacentNonGCSlotCount++; + i++; + } while ((i < numSlots) && !layout->IsGCPtr(i)); + + // If we have a very small contiguous non-ref region, it's better just to + // emit a sequence of movsp instructions + if (adjacentNonGCSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) + { + for (; adjacentNonGCSlotCount > 0; adjacentNonGCSlotCount--) + { + instGen(INS_movsp); + } + } + else + { + GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, adjacentNonGCSlotCount); + instGen(INS_r_movsp); + } + } + else + { + // We have a GC (byref or ref) pointer + // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsp instruction, + // but the logic for emitting a GC info record is not available (it is internal for the emitter + // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do + // instGen(INS_movsp); and emission of gc info. + + var_types memType = layout->GetGCPtrType(i); + GetEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0); + genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE); +#ifdef DEBUG + numGCSlotsCopied++; +#endif // DEBUG + + i++; + if (i < numSlots) + { + // Source for the copy operation. + // If a LocalAddr, use EA_PTRSIZE - copy from stack. + // If not a LocalAddr, use EA_BYREF - the source location is not on the stack. + GetEmitter()->emitIns_R_I(INS_add, srcAddrAttr, REG_RSI, TARGET_POINTER_SIZE); + + // Always copying to the stack - outgoing arg area + // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE. + GetEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE); + } + } + } + + assert(numGCSlotsCopied == layout->GetGCPtrCount()); +#endif // TARGET_X86 + } +} +#endif // defined(FEATURE_PUT_STRUCT_ARG_STK) + +/***************************************************************************** + * + * Create and record GC Info for the function. + */ +#ifndef JIT32_GCENCODER +void +#else // !JIT32_GCENCODER +void* +#endif // !JIT32_GCENCODER +CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr)) +{ +#ifdef JIT32_GCENCODER + return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr)); +#else // !JIT32_GCENCODER + genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); +#endif // !JIT32_GCENCODER +} + +#ifdef JIT32_GCENCODER +void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + BYTE headerBuf[64]; + InfoHdr header; + + int s_cached; + +#ifdef FEATURE_EH_FUNCLETS + // We should do this before gcInfoBlockHdrSave since varPtrTableSize must be finalized before it + if (compiler->ehAnyFunclets()) + { + gcInfo.gcMarkFilterVarsPinned(); + } +#endif + +#ifdef DEBUG + size_t headerSize = +#endif + compiler->compInfoBlkSize = + gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached); + + size_t argTabOffset = 0; + size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset); + +#if DISPLAY_SIZES + + if (GetInterruptible()) + { + gcHeaderISize += compiler->compInfoBlkSize; + gcPtrMapISize += ptrMapSize; + } + else + { + gcHeaderNSize += compiler->compInfoBlkSize; + gcPtrMapNSize += ptrMapSize; + } + +#endif // DISPLAY_SIZES + + compiler->compInfoBlkSize += ptrMapSize; + + /* Allocate the info block for the method */ + + compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize); + +#if 0 // VERBOSE_SIZES + // TODO-X86-Cleanup: 'dataSize', below, is not defined + +// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100) + { + printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n", + compiler->info.compILCodeSize, + compiler->compInfoBlkSize, + codeSize + dataSize, + codeSize + dataSize - prologSize - epilogSize, + 100 * (codeSize + dataSize) / compiler->info.compILCodeSize, + 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize, + compiler->info.compClassName, + compiler->info.compMethodName); +} + +#endif + + /* Fill in the info block and return it to the caller */ + + void* infoPtr = compiler->compInfoBlkAddr; + + /* Create the method info block: header followed by GC tracking tables */ + + compiler->compInfoBlkAddr += + gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached); + + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize); + compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset); + assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize); + +#ifdef DEBUG + + if (0) + { + BYTE* temp = (BYTE*)infoPtr; + size_t size = compiler->compInfoBlkAddr - temp; + BYTE* ptab = temp + headerSize; + + noway_assert(size == headerSize + ptrMapSize); + + printf("Method info block - header [%zu bytes]:", headerSize); + + for (unsigned i = 0; i < size; i++) + { + if (temp == ptab) + { + printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize); + printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' '); + } + else + { + if (!(i % 16)) + printf("\n %04X: ", i); + } + + printf("%02X ", *temp++); + } + + printf("\n"); + } + +#endif // DEBUG + +#if DUMP_GC_TABLES + + if (compiler->opts.dspGCtbls) + { + const BYTE* base = (BYTE*)infoPtr; + size_t size; + unsigned methodSize; + InfoHdr dumpHeader; + + printf("GC Info for method %s\n", compiler->info.compFullName); + printf("GC info size = %3u\n", compiler->compInfoBlkSize); + + size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize); + // printf("size of header encoding is %3u\n", size); + printf("\n"); + + if (compiler->opts.dspGCtbls) + { + base += size; + size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize); + // printf("size of pointer table is %3u\n", size); + printf("\n"); + noway_assert(compiler->compInfoBlkAddr == (base + size)); + } + } + +#endif // DUMP_GC_TABLES + + /* Make sure we ended up generating the expected number of bytes */ + + noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize); + + return infoPtr; +} + +#else // !JIT32_GCENCODER +void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // We keep the call count for the second call to gcMakeRegPtrTable() below. + unsigned callCnt = 0; + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); + + if (compiler->opts.compDbgEnC) + { + // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) + // which is: + // -return address + // -saved off RBP + // -saved 'this' pointer and bool for synchronized methods + + // 4 slots for RBP + return address + RSI + RDI + int preservedAreaSize = 4 * REGSIZE_BYTES; + + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) + { + preservedAreaSize += REGSIZE_BYTES; + } + + // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack) + preservedAreaSize += 4; + } + + // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the + // frame + gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); + } + + if (compiler->opts.IsReversePInvoke()) + { + unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar; + assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM && reversePInvokeFrameVarNumber < compiler->lvaRefCount); + LclVarDsc& reversePInvokeFrameVar = compiler->lvaTable[reversePInvokeFrameVarNumber]; + gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar.GetStackOffset()); + } + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} +#endif // !JIT32_GCENCODER + +/***************************************************************************** + * Emit a call to a helper function. + * + */ + +void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg) +{ + void* addr = nullptr; + void* pAddr = nullptr; + + emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; + addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); + regNumber callTarget = REG_NA; + regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); + + if (!addr) + { + assert(pAddr != nullptr); + + // Absolute indirect call addr + // Note: Order of checks is important. First always check for pc-relative and next + // zero-relative. Because the former encoding is 1-byte smaller than the latter. + if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) || + genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr)) + { + // generate call whose target is specified by 32-bit offset relative to PC or zero. + callType = emitter::EC_FUNC_TOKEN_INDIR; + addr = pAddr; + } + else + { +#ifdef TARGET_AMD64 + // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero, + // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to + // make the call. + // mov reg, addr + // call [reg] + + if (callTargetReg == REG_NA) + { + // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but + // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. + callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; + regMaskTP callTargetMask = genRegMask(callTargetReg); + noway_assert((callTargetMask & killMask) == callTargetMask); + } + else + { + // The call target must not overwrite any live variable, though it may not be in the + // kill set for the call. + regMaskTP callTargetMask = genRegMask(callTargetReg); + noway_assert((callTargetMask & regSet.GetMaskVars()) == RBM_NONE); + } +#endif + + callTarget = callTargetReg; + CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL); + callType = emitter::EC_INDIR_ARD; + } + } + + // clang-format off + GetEmitter()->emitIns_Call(callType, + compiler->eeFindHelper(helper), + INDEBUG_LDISASM_COMMA(nullptr) addr, + argSize, + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, // IL offset + callTarget, // ireg + REG_NA, 0, 0, // xreg, xmul, disp + false // isJump + ); + // clang-format on + + regSet.verifyRegistersUsed(killMask); +} + +/***************************************************************************** +* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog +* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late +* disassembler thinks the instructions as the same as we do. +*/ + +// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. +// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. +//#define ALL_XARCH_EMITTER_UNIT_TESTS + +#if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64) +void CodeGen::genAmd64EmitterUnitTests() +{ + if (!verbose) + { + return; + } + + if (!compiler->opts.altJit) + { + // No point doing this in a "real" JIT. + return; + } + + // Mark the "fake" instructions in the output. + printf("*************** In genAmd64EmitterUnitTests()\n"); + + // We use this: + // genDefineTempLabel(genCreateTempLabel()); + // to create artificial labels to help separate groups of tests. + + // + // Loads + // + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef ALL_XARCH_EMITTER_UNIT_TESTS + genDefineTempLabel(genCreateTempLabel()); + + // vhaddpd ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddss xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddsd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddps xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddps ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddpd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vaddpd ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubss xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubsd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubps ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubps ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubpd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vsubpd ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulss xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulsd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulps xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulpd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulps ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vmulpd ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandps xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandpd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandps ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vandpd ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorps xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorpd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorps ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vorpd ymm0,ymm1,ymm2 + GetEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivss xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivsd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivss xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivsd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + + // vdivss xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); + // vdivsd xmm0,xmm1,xmm2 + GetEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); +#endif // ALL_XARCH_EMITTER_UNIT_TESTS + printf("*************** End of genAmd64EmitterUnitTests()\n"); +} + +#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64) + +#ifdef PROFILING_SUPPORTED + +#ifdef TARGET_X86 + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. This variable remains unchanged. +// +// Return Value: +// None +// +// Notes: +// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in +// VM\i386\asmhelpers.asm for details): +// 1. The calling sequence for calling the helper is: +// push FunctionIDOrClientID +// call ProfileEnterHelper +// 2. The calling function has an EBP frame. +// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, +// the following prolog is assumed: +// push ESP +// mov EBP, ESP +// 4. All registers are preserved. +// 5. The helper pops the FunctionIDOrClientID argument from the stack. +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + unsigned saveStackLvl2 = genStackLevel; + +// Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK() +// for x86 stack unwinding + +#if defined(UNIX_X86_ABI) + // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() + GetEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); +#endif // UNIX_X86_ABI + + // Push the profilerHandle + if (compiler->compProfilerMethHndIndirected) + { + GetEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + + // This will emit either + // "call ip-relative 32-bit offset" or + // "mov rax, helper addr; call rax" + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, + 0, // argSize. Again, we have to lie about it + EA_UNKNOWN); // retSize + + // Check that we have place for the push. + assert(compiler->fgGetPtrArgCntMax() >= 1); + +#if defined(UNIX_X86_ABI) + // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall + GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); +#endif // UNIX_X86_ABI + + /* Restore the stack level */ + + SetStackLevel(saveStackLvl2); +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +// Notes: +// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and +// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details): +// 1. The calling sequence for calling the helper is: +// push FunctionIDOrClientID +// call ProfileLeaveHelper or ProfileTailcallHelper +// 2. The calling function has an EBP frame. +// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, +// the following prolog is assumed: +// push ESP +// mov EBP, ESP +// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved. +// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved. +// 5. The helper pops the FunctionIDOrClientID argument from the stack. +// +void CodeGen::genProfilingLeaveCallback(unsigned helper) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + + // Need to save on to the stack level, since the helper call will pop the argument + unsigned saveStackLvl2 = genStackLevel; + +#if defined(UNIX_X86_ABI) + // Manually align the stack to be 16-byte aligned. This is similar to CodeGen::genAlignStackBeforeCall() + GetEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_SPBASE, 0xC); + AddStackLevel(0xC); + AddNestedAlignment(0xC); +#endif // UNIX_X86_ABI + + // + // Push the profilerHandle + // + + if (compiler->compProfilerMethHndIndirected) + { + GetEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd); + } + genSinglePush(); + +#if defined(UNIX_X86_ABI) + int argSize = -REGSIZE_BYTES; // negative means caller-pop (cdecl) +#else + int argSize = REGSIZE_BYTES; +#endif + genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */); + + // Check that we have place for the push. + assert(compiler->fgGetPtrArgCntMax() >= 1); + +#if defined(UNIX_X86_ABI) + // Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall + GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10); + SubtractStackLevel(0x10); + SubtractNestedAlignment(0xC); +#endif // UNIX_X86_ABI + + /* Restore the stack level */ + SetStackLevel(saveStackLvl2); +} + +#endif // TARGET_X86 + +#ifdef TARGET_AMD64 + +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. +// +// Return Value: +// None +// +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) +{ + assert(compiler->compGeneratingProlog); + + // Give profiler a chance to back out of hooking this method + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + +#if !defined(UNIX_AMD64_ABI) + + unsigned varNum; + LclVarDsc* varDsc; + + // Since the method needs to make a profiler callback, it should have out-going arg space allocated. + noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); + + // Home all arguments passed in arg registers (RCX, RDX, R8 and R9). + // In case of vararg methods, arg regs are already homed. + // + // Note: Here we don't need to worry about updating gc'info since enter + // callback is generated as part of prolog which is non-gc interruptible. + // Moreover GC cannot kick while executing inside profiler callback which is a + // profiler requirement so it can examine arguments which could be obj refs. + if (!compiler->info.compIsVarArgs) + { + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + noway_assert(varDsc->lvIsParam); + + if (!varDsc->lvIsRegArg) + { + continue; + } + + var_types storeType = varDsc->lvaArgType(); + regNumber argReg = varDsc->GetArgReg(); + + instruction store_ins = ins_Store(storeType); + +#ifdef FEATURE_SIMD + if ((storeType == TYP_SIMD8) && genIsValidIntReg(argReg)) + { + store_ins = INS_mov; + } +#endif // FEATURE_SIMD + + GetEmitter()->emitIns_S_R(store_ins, emitTypeSize(storeType), argReg, varNum, 0); + } + } + + // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) + // RCX = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + // Profiler hooks enabled during Ngen time. + // Profiler handle needs to be accessed through an indirection of a pointer. + GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + // No need to record relocations, if we are generating ELT hooks under the influence + // of COMPlus_JitELTHookEnabled=1 + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // RDX = caller's SP + // Notes + // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. + // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value + // of that offset to FramePointer to obtain caller's SP value. + assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); + + // This will emit either + // "call ip-relative 32-bit offset" or + // "mov rax, helper addr; call rax" + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN); + + // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog + // generation logic that moves args around as required by first BB entry point conditions + // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs() + // and genEnregisterIncomingStackArgs(). + // + // Now reload arg registers from home locations. + // Vararg methods: + // - we need to reload only known (i.e. fixed) reg args. + // - if floating point type, also reload it into corresponding integer reg + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++) + { + noway_assert(varDsc->lvIsParam); + + if (!varDsc->lvIsRegArg) + { + continue; + } + + var_types loadType = varDsc->lvaArgType(); + regNumber argReg = varDsc->GetArgReg(); + + instruction load_ins = ins_Load(loadType); + +#ifdef FEATURE_SIMD + if ((loadType == TYP_SIMD8) && genIsValidIntReg(argReg)) + { + load_ins = INS_mov; + } +#endif // FEATURE_SIMD + + GetEmitter()->emitIns_R_S(load_ins, emitTypeSize(loadType), argReg, varNum, 0); + +#if FEATURE_VARARG + if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) + { + regNumber intArgReg = compiler->getCallArgIntRegister(argReg); + inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); + } +#endif // FEATURE_VARARG + } + + // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. + if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) + { + *pInitRegZeroed = false; + } + +#else // !defined(UNIX_AMD64_ABI) + + // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP) + // R14 = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + // Profiler hooks enabled during Ngen time. + // Profiler handle needs to be accessed through an indirection of a pointer. + GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_PROFILER_ENTER_ARG_0, + (ssize_t)compiler->compProfilerMethHnd); + } + else + { + // No need to record relocations, if we are generating ELT hooks under the influence + // of COMPlus_JitELTHookEnabled=1 + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_PROFILER_ENTER_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // R15 = caller's SP + // Notes + // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout. + // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value + // of that offset to FramePointer to obtain caller's SP value. + assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_PROFILER_ENTER_ARG_1, genFramePointerReg(), -callerSPOffset); + + // We can use any callee trash register (other than RAX, RDI, RSI) for call target. + // We use R11 here. This will emit either + // "call ip-relative 32-bit offset" or + // "mov r11, helper addr; call r11" + genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); + + // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. + if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) + { + *pInitRegZeroed = false; + } + +#endif // !defined(UNIX_AMD64_ABI) +} + +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +void CodeGen::genProfilingLeaveCallback(unsigned helper) +{ + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + + // Only hook if profiler says it's okay. + if (!compiler->compIsProfilerHookNeeded()) + { + return; + } + + compiler->info.compProfilerCallback = true; + +#if !defined(UNIX_AMD64_ABI) + + // Since the method needs to make a profiler callback, it should have out-going arg space allocated. + noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); + noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); + + // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash + // registers that profiler callback kills. + if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg()) + { + regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].GetRegNum()); + noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0); + } + + // At this point return value is computed and stored in RAX or XMM0. + // On Amd64, Leave callback preserves the return register. We keep + // RAX alive by not reporting as trashed by helper call. Also note + // that GC cannot kick-in while executing inside profiler callback, + // which is a requirement of profiler as well since it needs to examine + // return value which could be an obj ref. + + // RCX = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + // Profiler hooks enabled during Ngen time. + // Profiler handle needs to be accessed through an indirection of an address. + GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + // Don't record relocations, if we are generating ELT hooks under the influence + // of COMPlus_JitELTHookEnabled=1 + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // RDX = caller's SP + // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion + // of the stmnts to execute unconditionally and clean-up rest. + if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) + { + // Caller's SP relative offset to FramePointer will be negative. We need to add absolute + // value of that offset to FramePointer to obtain caller's SP value. + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); + } + else + { + // If we are here means that it is a tentative frame layout during which we + // cannot use caller's SP offset since it is an estimate. For now we require the + // method to have at least a single arg so that we can use it to obtain caller's + // SP. + LclVarDsc* varDsc = compiler->lvaTable; + NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); + + // lea rdx, [FramePointer + Arg0's offset] + GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); + } + + // We can use any callee trash register (other than RAX, RCX, RDX) for call target. + // We use R8 here. This will emit either + // "call ip-relative 32-bit offset" or + // "mov r8, helper addr; call r8" + genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2); + +#else // !defined(UNIX_AMD64_ABI) + + // RDI = ProfilerMethHnd + if (compiler->compProfilerMethHndIndirected) + { + GetEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + else + { + if (compiler->opts.compJitELTHookEnabled) + { + genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); + } + else + { + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd); + } + } + + // RSI = caller's SP + if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) + { + int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed()); + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset); + } + else + { + LclVarDsc* varDsc = compiler->lvaTable; + NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params"); + + // lea rdx, [FramePointer + Arg0's offset] + GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0); + } + + // We can use any callee trash register (other than RAX, RDI, RSI) for call target. + // We use R11 here. This will emit either + // "call ip-relative 32-bit offset" or + // "mov r11, helper addr; call r11" + genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_DEFAULT_PROFILER_CALL_TARGET); + +#endif // !defined(UNIX_AMD64_ABI) +} + +#endif // TARGET_AMD64 + +#endif // PROFILING_SUPPORTED + +//------------------------------------------------------------------------ +// genPushCalleeSavedRegisters: Push any callee-saved registers we have used. +// +void CodeGen::genPushCalleeSavedRegisters() +{ + assert(compiler->compGeneratingProlog); + + // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack + // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not + // here. + regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED; + +#if ETW_EBP_FRAMED + if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE)) + { + noway_assert(!"Used register RBM_FPBASE as a scratch register!"); + } +#endif + + // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method + if (isFramePointerUsed()) + { + rsPushRegs &= ~RBM_FPBASE; + } + +#ifdef DEBUG + if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs)) + { + printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ", + compiler->compCalleeRegsPushed, genCountBits(rsPushRegs)); + dspRegMask(rsPushRegs); + printf("\n"); + assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs)); + } +#endif // DEBUG + + // Push backwards so we match the order we will pop them in the epilog + // and all the other code that expects it to be in this order. + for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg)) + { + regMaskTP regBit = genRegMask(reg); + + if ((regBit & rsPushRegs) != 0) + { + inst_RV(INS_push, reg, TYP_REF); + compiler->unwindPush(reg); +#ifdef USING_SCOPE_INFO + if (!doubleAlignOrFramePointerUsed()) + { + psiAdjustStackLevel(REGSIZE_BYTES); + } +#endif // USING_SCOPE_INFO + rsPushRegs &= ~regBit; + } + } +} + +#endif // TARGET_XARCH diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7ce35872d1bc0..85bb66d23abde 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1,19415 +1,19489 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX Morph XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ - -#include "jitpch.h" -#ifdef _MSC_VER -#pragma hdrstop -#endif - -#include "allocacheck.h" // for alloca - -// Convert the given node into a call to the specified helper passing -// the given argument list. -// -// Tries to fold constants and also adds an edge for overflow exception -// returns the morphed tree -GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper) -{ - GenTree* result; - - /* If the operand is a constant, we'll try to fold it */ - if (oper->OperIsConst()) - { - GenTree* oldTree = tree; - - tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) - - if (tree != oldTree) - { - return fgMorphTree(tree); - } - else if (tree->OperKind() & GTK_CONST) - { - return fgMorphConst(tree); - } - - // assert that oper is unchanged and that it is still a GT_CAST node - noway_assert(tree->AsCast()->CastOp() == oper); - noway_assert(tree->gtOper == GT_CAST); - } - result = fgMorphIntoHelperCall(tree, helper, gtNewCallArgs(oper)); - assert(result == tree); - return result; -} - -/***************************************************************************** - * - * Convert the given node into a call to the specified helper passing - * the given argument list. - */ - -GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeCall::Use* args, bool morphArgs) -{ - // The helper call ought to be semantically equivalent to the original node, so preserve its VN. - tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN); - - GenTreeCall* call = tree->AsCall(); - - call->gtCallType = CT_HELPER; - call->gtCallMethHnd = eeFindHelper(helper); - call->gtCallThisArg = nullptr; - call->gtCallArgs = args; - call->gtCallLateArgs = nullptr; - call->fgArgInfo = nullptr; - call->gtRetClsHnd = nullptr; - call->gtCallMoreFlags = 0; - call->gtInlineCandidateInfo = nullptr; - call->gtControlExpr = nullptr; - -#if DEBUG - // Helper calls are never candidates. - call->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; - - call->callSig = nullptr; - -#endif // DEBUG - -#ifdef FEATURE_READYTORUN_COMPILER - call->gtEntryPoint.addr = nullptr; - call->gtEntryPoint.accessType = IAT_VALUE; -#endif - -#if FEATURE_MULTIREG_RET - call->ResetReturnType(); - call->ClearOtherRegs(); - call->ClearOtherRegFlags(); -#ifndef TARGET_64BIT - if (varTypeIsLong(tree)) - { - call->InitializeLongReturnType(); - } -#endif // !TARGET_64BIT -#endif // FEATURE_MULTIREG_RET - - if (tree->OperMayThrow(this)) - { - tree->gtFlags |= GTF_EXCEPT; - } - else - { - tree->gtFlags &= ~GTF_EXCEPT; - } - tree->gtFlags |= GTF_CALL; - - for (GenTreeCall::Use& use : GenTreeCall::UseList(args)) - { - tree->gtFlags |= (use.GetNode()->gtFlags & GTF_ALL_EFFECT); - } - - /* Perform the morphing */ - - if (morphArgs) - { - tree = fgMorphArgs(call); - } - - return tree; -} - -/***************************************************************************** - * - * Morph a cast node (we perform some very simple transformations here). - */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif -GenTree* Compiler::fgMorphCast(GenTree* tree) -{ - noway_assert(tree->gtOper == GT_CAST); - noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE); - - /* The first sub-operand is the thing being cast */ - - GenTree* oper = tree->AsCast()->CastOp(); - - if (fgGlobalMorph && (oper->gtOper == GT_ADDR)) - { - // Make sure we've checked if 'oper' is an address of an implicit-byref parameter. - // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast - // morphing code to see that type. - fgMorphImplicitByRefArgs(oper); - } - - var_types srcType = genActualType(oper->TypeGet()); - - var_types dstType = tree->CastToType(); - unsigned dstSize = genTypeSize(dstType); - - // See if the cast has to be done in two steps. R -> I - if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) - { - if (srcType == TYP_FLOAT -#if defined(TARGET_ARM64) - // Arm64: src = float, dst is overflow conversion. - // This goes through helper and hence src needs to be converted to double. - && tree->gtOverflow() -#elif defined(TARGET_AMD64) - // Amd64: src = float, dst = uint64 or overflow conversion. - // This goes through helper and hence src needs to be converted to double. - && (tree->gtOverflow() || (dstType == TYP_ULONG)) -#elif defined(TARGET_ARM) - // Arm: src = float, dst = int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType)) -#else - // x86: src = float, dst = uint32/int64/uint64 or overflow conversion. - && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT)) -#endif - ) - { - oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); - } - - // do we need to do it in two steps R -> I, '-> smallType - CLANG_FORMAT_COMMENT_ANCHOR; - -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) - if (dstSize < genTypeSize(TYP_INT)) - { - oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT); - oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); - tree->gtFlags &= ~GTF_UNSIGNED; - } -#else - if (dstSize < TARGET_POINTER_SIZE) - { - oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL); - oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); - } -#endif - else - { - /* Note that if we need to use a helper call then we can not morph oper */ - if (!tree->gtOverflow()) - { -#ifdef TARGET_ARM64 // On ARM64 All non-overflow checking conversions can be optimized - goto OPTIMIZECAST; -#else - switch (dstType) - { - case TYP_INT: - goto OPTIMIZECAST; - - case TYP_UINT: -#if defined(TARGET_ARM) || defined(TARGET_AMD64) - goto OPTIMIZECAST; -#else // TARGET_X86 - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); -#endif // TARGET_X86 - - case TYP_LONG: -#ifdef TARGET_AMD64 - // SSE2 has instructions to convert a float/double directly to a long - goto OPTIMIZECAST; -#else // !TARGET_AMD64 - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); -#endif // !TARGET_AMD64 - - case TYP_ULONG: - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); - default: - break; - } -#endif // TARGET_ARM64 - } - else - { - switch (dstType) - { - case TYP_INT: - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper); - case TYP_UINT: - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper); - case TYP_LONG: - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper); - case TYP_ULONG: - return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper); - default: - break; - } - } - noway_assert(!"Unexpected dstType"); - } - } -#ifndef TARGET_64BIT - // The code generation phase (for x86 & ARM32) does not handle casts - // directly from [u]long to anything other than [u]int. Insert an - // intermediate cast to native int. - else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType)) - { - oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL); - oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); - tree->gtFlags &= ~GTF_UNSIGNED; - } -#endif //! TARGET_64BIT - -#ifdef TARGET_ARM - else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) && - !varTypeIsLong(oper->AsCast()->CastOp())) - { - // optimization: conv.r4(conv.r8(?)) -> conv.r4(d) - // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step. - // This happens semi-frequently because there is no IL 'conv.r4.un' - oper->gtType = TYP_FLOAT; - oper->CastToType() = TYP_FLOAT; - return fgMorphTree(oper); - } - // converts long/ulong --> float/double casts into helper calls. - else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType)) - { - if (dstType == TYP_FLOAT) - { - // there is only a double helper, so we - // - change the dsttype to double - // - insert a cast from double to float - // - recurse into the resulting tree - tree->CastToType() = TYP_DOUBLE; - tree->gtType = TYP_DOUBLE; - - tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); - - return fgMorphTree(tree); - } - if (tree->gtFlags & GTF_UNSIGNED) - return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); - return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); - } -#endif // TARGET_ARM - -#ifdef TARGET_AMD64 - // Do we have to do two step U4/8 -> R4/8 ? - // Codegen supports the following conversion as one-step operation - // a) Long -> R4/R8 - // b) U8 -> R8 - // - // The following conversions are performed as two-step operations using above. - // U4 -> R4/8 = U4-> Long -> R4/8 - // U8 -> R4 = U8 -> R8 -> R4 - else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) - { - srcType = genUnsignedType(srcType); - - if (srcType == TYP_ULONG) - { - if (dstType == TYP_FLOAT) - { - // Codegen can handle U8 -> R8 conversion. - // U8 -> R4 = U8 -> R8 -> R4 - // - change the dsttype to double - // - insert a cast from double to float - // - recurse into the resulting tree - tree->CastToType() = TYP_DOUBLE; - tree->gtType = TYP_DOUBLE; - tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); - return fgMorphTree(tree); - } - } - else if (srcType == TYP_UINT) - { - oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); - oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); - tree->gtFlags &= ~GTF_UNSIGNED; - } - } -#endif // TARGET_AMD64 - -#ifdef TARGET_X86 - // Do we have to do two step U4/8 -> R4/8 ? - else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) - { - srcType = genUnsignedType(srcType); - - if (srcType == TYP_ULONG) - { - return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); - } - else if (srcType == TYP_UINT) - { - oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); - oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); - tree->gtFlags &= ~GTF_UNSIGNED; - return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); - } - } - else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType)) - { - oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); - - // Since we don't have a Jit Helper that converts to a TYP_FLOAT - // we just use the one that converts to a TYP_DOUBLE - // and then add a cast to TYP_FLOAT - // - if ((dstType == TYP_FLOAT) && (oper->OperGet() == GT_CALL)) - { - // Fix the return type to be TYP_DOUBLE - // - oper->gtType = TYP_DOUBLE; - - // Add a Cast to TYP_FLOAT - // - tree = gtNewCastNode(TYP_FLOAT, oper, false, TYP_FLOAT); - INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - - return tree; - } - else - { - return oper; - } - } -#endif // TARGET_X86 - else if (varTypeIsGC(srcType) != varTypeIsGC(dstType)) - { - // We are casting away GC information. we would like to just - // change the type to int, however this gives the emitter fits because - // it believes the variable is a GC variable at the beginning of the - // instruction group, but is not turned non-gc by the code generator - // we fix this by copying the GC pointer to a non-gc pointer temp. - noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?"); - - // We generate an assignment to an int and then do the cast from an int. With this we avoid - // the gc problem and we allow casts to bytes, longs, etc... - unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC")); - oper->gtType = TYP_I_IMPL; - GenTree* asg = gtNewTempAssign(lclNum, oper); - oper->gtType = srcType; - - // do the real cast - GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType); - - // Generate the comma tree - oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast); - - return fgMorphTree(oper); - } - - // Look for narrowing casts ([u]long -> [u]int) and try to push them - // down into the operand before morphing it. - // - // It doesn't matter if this is cast is from ulong or long (i.e. if - // GTF_UNSIGNED is set) because the transformation is only applied to - // overflow-insensitive narrowing casts, which always silently truncate. - // - // Note that casts from [u]long to small integer types are handled above. - if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT))) - { - // As a special case, look for overflow-sensitive casts of an AND - // expression, and see if the second operand is a small constant. Since - // the result of an AND is bound by its smaller operand, it may be - // possible to prove that the cast won't overflow, which will in turn - // allow the cast's operand to be transformed. - if (tree->gtOverflow() && (oper->OperGet() == GT_AND)) - { - GenTree* andOp2 = oper->AsOp()->gtOp2; - - // Special case to the special case: AND with a casted int. - if ((andOp2->OperGet() == GT_CAST) && (andOp2->AsCast()->CastOp()->OperGet() == GT_CNS_INT)) - { - // gtFoldExprConst will deal with whether the cast is signed or - // unsigned, or overflow-sensitive. - andOp2 = gtFoldExprConst(andOp2); - oper->AsOp()->gtOp2 = andOp2; - } - - // Look for a constant less than 2^{32} for a cast to uint, or less - // than 2^{31} for a cast to int. - int maxWidth = (dstType == TYP_UINT) ? 32 : 31; - - if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->AsIntConCommon()->LngValue() >> maxWidth) == 0)) - { - // This cast can't overflow. - tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT); - } - } - - // Only apply this transformation during global morph, - // when neither the cast node nor the oper node may throw an exception - // based on the upper 32 bits. - // - if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx()) - { - // For these operations the lower 32 bits of the result only depends - // upon the lower 32 bits of the operands. - // - bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG); - - // For long LSH cast to int, there is a discontinuity in behavior - // when the shift amount is 32 or larger. - // - // CAST(INT, LSH(1LL, 31)) == LSH(1, 31) - // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31) - // - // CAST(INT, LSH(1LL, 32)) == 0 - // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1 - // - // So some extra validation is needed. - // - if (oper->OperIs(GT_LSH)) - { - GenTree* shiftAmount = oper->AsOp()->gtOp2; - - // Expose constant value for shift, if possible, to maximize the number - // of cases we can handle. - shiftAmount = gtFoldExpr(shiftAmount); - oper->AsOp()->gtOp2 = shiftAmount; - -#if DEBUG - // We may remorph the shift amount tree again later, so clear any morphed flag. - shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - - if (shiftAmount->IsIntegralConst()) - { - const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue(); - - if ((shiftAmountValue >= 64) || (shiftAmountValue < 0)) - { - // Shift amount is large enough or negative so result is undefined. - // Don't try to optimize. - assert(!canPushCast); - } - else if (shiftAmountValue >= 32) - { - // We know that we have a narrowing cast ([u]long -> [u]int) - // and that we are casting to a 32-bit value, which will result in zero. - // - // Check to see if we have any side-effects that we must keep - // - if ((tree->gtFlags & GTF_ALL_EFFECT) == 0) - { - // Result of the shift is zero. - DEBUG_DESTROY_NODE(tree); - GenTree* zero = gtNewZeroConNode(TYP_INT); - return fgMorphTree(zero); - } - else // We do have a side-effect - { - // We could create a GT_COMMA node here to keep the side-effect and return a zero - // Instead we just don't try to optimize this case. - canPushCast = false; - } - } - else - { - // Shift amount is positive and small enough that we can push the cast through. - canPushCast = true; - } - } - else - { - // Shift amount is unknown. We can't optimize this case. - assert(!canPushCast); - } - } - - if (canPushCast) - { - DEBUG_DESTROY_NODE(tree); - - // Insert narrowing casts for op1 and op2. - oper->AsOp()->gtOp1 = gtNewCastNode(TYP_INT, oper->AsOp()->gtOp1, false, dstType); - if (oper->AsOp()->gtOp2 != nullptr) - { - oper->AsOp()->gtOp2 = gtNewCastNode(TYP_INT, oper->AsOp()->gtOp2, false, dstType); - } - - // Clear the GT_MUL_64RSLT if it is set. - if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT)) - { - oper->gtFlags &= ~GTF_MUL_64RSLT; - } - - // The operation now produces a 32-bit result. - oper->gtType = TYP_INT; - - // Remorph the new tree as the casts that we added may be folded away. - return fgMorphTree(oper); - } - } - } - -OPTIMIZECAST: - noway_assert(tree->gtOper == GT_CAST); - - /* Morph the operand */ - tree->AsCast()->CastOp() = oper = fgMorphTree(oper); - - /* Reset the call flag */ - tree->gtFlags &= ~GTF_CALL; - - /* Reset the assignment flag */ - tree->gtFlags &= ~GTF_ASG; - - /* unless we have an overflow cast, reset the except flag */ - if (!tree->gtOverflow()) - { - tree->gtFlags &= ~GTF_EXCEPT; - } - - /* Just in case new side effects were introduced */ - tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT); - - if (!gtIsActiveCSE_Candidate(tree) && !gtIsActiveCSE_Candidate(oper)) - { - srcType = oper->TypeGet(); - - /* See if we can discard the cast */ - if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType)) - { - if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType)) - { - if (varTypeIsSmall(srcType)) - { - // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the - // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType - // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is - // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion - // from u4 to i2. - srcType = genActualType(srcType); - } - - srcType = genUnsignedType(srcType); - } - - if (srcType == dstType) - { // Certainly if they are identical it is pointless - goto REMOVE_CAST; - } - - if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType)) - { - unsigned varNum = oper->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[varNum]; - if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore()) - { - goto REMOVE_CAST; - } - } - - bool unsignedSrc = varTypeIsUnsigned(srcType); - bool unsignedDst = varTypeIsUnsigned(dstType); - bool signsDiffer = (unsignedSrc != unsignedDst); - unsigned srcSize = genTypeSize(srcType); - - // For same sized casts with - // the same signs or non-overflow cast we discard them as well - if (srcSize == dstSize) - { - /* This should have been handled above */ - noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType)); - - if (!signsDiffer) - { - goto REMOVE_CAST; - } - - if (!tree->gtOverflow()) - { - /* For small type casts, when necessary we force - the src operand to the dstType and allow the - implied load from memory to perform the casting */ - if (varTypeIsSmall(srcType)) - { - switch (oper->gtOper) - { - case GT_IND: - case GT_CLS_VAR: - case GT_LCL_FLD: - case GT_ARR_ELEM: - oper->gtType = dstType; - // We're changing the type here so we need to update the VN; - // in other cases we discard the cast without modifying oper - // so the VN doesn't change. - oper->SetVNsFromNode(tree); - goto REMOVE_CAST; - default: - break; - } - } - else - { - goto REMOVE_CAST; - } - } - } - else if (srcSize < dstSize) // widening cast - { - // Keep any long casts - if (dstSize == sizeof(int)) - { - // Only keep signed to unsigned widening cast with overflow check - if (!tree->gtOverflow() || !unsignedDst || unsignedSrc) - { - goto REMOVE_CAST; - } - } - - // Widening casts from unsigned or to signed can never overflow - - if (unsignedSrc || !unsignedDst) - { - tree->gtFlags &= ~GTF_OVERFLOW; - if (!(oper->gtFlags & GTF_EXCEPT)) - { - tree->gtFlags &= ~GTF_EXCEPT; - } - } - } - else // if (srcSize > dstSize) - { - // Try to narrow the operand of the cast and discard the cast - // Note: Do not narrow a cast that is marked as a CSE - // And do not narrow if the oper is marked as a CSE either - // - if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) && - optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false)) - { - optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true); - - /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */ - if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType())) - { - oper = oper->AsCast()->CastOp(); - } - goto REMOVE_CAST; - } - } - } - - switch (oper->gtOper) - { - /* If the operand is a constant, we'll fold it */ - case GT_CNS_INT: - case GT_CNS_LNG: - case GT_CNS_DBL: - case GT_CNS_STR: - { - GenTree* oldTree = tree; - - tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) - - // Did we get a comma throw as a result of gtFoldExprConst? - if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA)) - { - noway_assert(fgIsCommaThrow(tree)); - tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1); - fgMorphTreeDone(tree); - return tree; - } - else if (tree->gtOper != GT_CAST) - { - return tree; - } - - noway_assert(tree->AsCast()->CastOp() == oper); // unchanged - } - break; - - case GT_CAST: - /* Check for two consecutive casts into the same dstType */ - if (!tree->gtOverflow()) - { - var_types dstType2 = oper->CastToType(); - if (dstType == dstType2) - { - goto REMOVE_CAST; - } - } - break; - - case GT_COMMA: - // Check for cast of a GT_COMMA with a throw overflow - // Bug 110829: Since this optimization will bash the types - // neither oper or commaOp2 can be CSE candidates - if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate - { - GenTree* commaOp2 = oper->AsOp()->gtOp2; - - if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate - { - // need type of oper to be same as tree - if (tree->gtType == TYP_LONG) - { - commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); - commaOp2->AsIntConCommon()->SetLngValue(0); - /* Change the types of oper and commaOp2 to TYP_LONG */ - oper->gtType = commaOp2->gtType = TYP_LONG; - } - else if (varTypeIsFloating(tree->gtType)) - { - commaOp2->ChangeOperConst(GT_CNS_DBL); - commaOp2->AsDblCon()->gtDconVal = 0.0; - // Change the types of oper and commaOp2 - oper->gtType = commaOp2->gtType = tree->gtType; - } - else - { - commaOp2->ChangeOperConst(GT_CNS_INT); - commaOp2->AsIntCon()->gtIconVal = 0; - /* Change the types of oper and commaOp2 to TYP_INT */ - oper->gtType = commaOp2->gtType = TYP_INT; - } - } - - if (vnStore != nullptr) - { - fgValueNumberTreeConst(commaOp2); - } - - /* Return the GT_COMMA node as the new tree */ - return oper; - } - break; - - default: - break; - } /* end switch (oper->gtOper) */ - } - - if (tree->gtOverflow()) - { - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); - } - - return tree; - -REMOVE_CAST: - /* Here we've eliminated the cast, so just return it's operand */ - assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate - - DEBUG_DESTROY_NODE(tree); - return oper; -} -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - -#ifdef DEBUG -void fgArgTabEntry::Dump() const -{ - printf("fgArgTabEntry[arg %u", argNum); - printf(" %d.%s", GetNode()->gtTreeID, GenTree::OpName(GetNode()->OperGet())); - printf(" %s", varTypeName(argType)); - printf(" (%s)", passedByRef ? "By ref" : "By value"); - if (GetRegNum() != REG_STK) - { - printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s"); - for (unsigned i = 0; i < numRegs; i++) - { - printf(" %s", getRegName(regNums[i])); - } - } - if (GetStackByteSize() > 0) - { -#if defined(DEBUG_ARG_SLOTS) - printf(", numSlots=%u, slotNum=%u, byteSize=%u, byteOffset=%u", numSlots, slotNum, m_byteSize, m_byteOffset); -#else - printf(", byteSize=%u, byteOffset=%u", m_byteSize, m_byteOffset); - -#endif - } - printf(", byteAlignment=%u", m_byteAlignment); - if (isLateArg()) - { - printf(", lateArgInx=%u", GetLateArgInx()); - } - if (IsSplit()) - { - printf(", isSplit"); - } - if (needTmp) - { - printf(", tmpNum=V%02u", tmpNum); - } - if (needPlace) - { - printf(", needPlace"); - } - if (isTmp) - { - printf(", isTmp"); - } - if (processed) - { - printf(", processed"); - } - if (IsHfaRegArg()) - { - printf(", isHfa(%s)", varTypeName(GetHfaType())); - } - if (isBackFilled) - { - printf(", isBackFilled"); - } - if (isNonStandard) - { - printf(", isNonStandard"); - } - if (isStruct) - { - printf(", isStruct"); - } - printf("]\n"); -} -#endif - -fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs) -{ - compiler = comp; - callTree = call; - argCount = 0; // filled in arg count, starts at zero - DEBUG_ARG_SLOTS_ONLY(nextSlotNum = INIT_ARG_STACK_SLOT;) - nextStackByteOffset = INIT_ARG_STACK_SLOT * TARGET_POINTER_SIZE; - stkLevel = 0; -#if defined(UNIX_X86_ABI) - alignmentDone = false; - stkSizeBytes = 0; - padStkAlign = 0; -#endif -#if FEATURE_FIXED_OUT_ARGS - outArgSize = 0; -#endif - - argTableSize = numArgs; // the allocated table size - - hasRegArgs = false; - hasStackArgs = false; - argsComplete = false; - argsSorted = false; - needsTemps = false; - - if (argTableSize == 0) - { - argTable = nullptr; - } - else - { - argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; - } -} - -/***************************************************************************** - * - * fgArgInfo Copy Constructor - * - * This method needs to act like a copy constructor for fgArgInfo. - * The newCall needs to have its fgArgInfo initialized such that - * we have newCall that is an exact copy of the oldCall. - * We have to take care since the argument information - * in the argTable contains pointers that must point to the - * new arguments and not the old arguments. - */ -fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall) -{ - fgArgInfo* oldArgInfo = oldCall->AsCall()->fgArgInfo; - - compiler = oldArgInfo->compiler; - callTree = newCall; - argCount = 0; // filled in arg count, starts at zero - DEBUG_ARG_SLOTS_ONLY(nextSlotNum = INIT_ARG_STACK_SLOT;) - nextStackByteOffset = INIT_ARG_STACK_SLOT * TARGET_POINTER_SIZE; - stkLevel = oldArgInfo->stkLevel; -#if defined(UNIX_X86_ABI) - alignmentDone = oldArgInfo->alignmentDone; - stkSizeBytes = oldArgInfo->stkSizeBytes; - padStkAlign = oldArgInfo->padStkAlign; -#endif -#if FEATURE_FIXED_OUT_ARGS - outArgSize = oldArgInfo->outArgSize; -#endif - argTableSize = oldArgInfo->argTableSize; - argsComplete = false; - argTable = nullptr; - - assert(oldArgInfo->argsComplete); - - if (argTableSize > 0) - { - argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; - - // Copy the old arg entries - for (unsigned i = 0; i < argTableSize; i++) - { - argTable[i] = new (compiler, CMK_fgArgInfo) fgArgTabEntry(*oldArgInfo->argTable[i]); - } - - // The copied arg entries contain pointers to old uses, they need - // to be updated to point to new uses. - if (newCall->gtCallThisArg != nullptr) - { - for (unsigned i = 0; i < argTableSize; i++) - { - if (argTable[i]->use == oldCall->gtCallThisArg) - { - argTable[i]->use = newCall->gtCallThisArg; - break; - } - } - } - - GenTreeCall::UseIterator newUse = newCall->Args().begin(); - GenTreeCall::UseIterator newUseEnd = newCall->Args().end(); - GenTreeCall::UseIterator oldUse = oldCall->Args().begin(); - GenTreeCall::UseIterator oldUseEnd = newCall->Args().end(); - - for (; newUse != newUseEnd; ++newUse, ++oldUse) - { - for (unsigned i = 0; i < argTableSize; i++) - { - if (argTable[i]->use == oldUse.GetUse()) - { - argTable[i]->use = newUse.GetUse(); - break; - } - } - } - - newUse = newCall->LateArgs().begin(); - newUseEnd = newCall->LateArgs().end(); - oldUse = oldCall->LateArgs().begin(); - oldUseEnd = newCall->LateArgs().end(); - - for (; newUse != newUseEnd; ++newUse, ++oldUse) - { - for (unsigned i = 0; i < argTableSize; i++) - { - if (argTable[i]->lateUse == oldUse.GetUse()) - { - argTable[i]->lateUse = newUse.GetUse(); - break; - } - } - } - } - - argCount = oldArgInfo->argCount; - DEBUG_ARG_SLOTS_ONLY(nextSlotNum = oldArgInfo->nextSlotNum;) - nextStackByteOffset = oldArgInfo->nextStackByteOffset; - - hasRegArgs = oldArgInfo->hasRegArgs; - hasStackArgs = oldArgInfo->hasStackArgs; - argsComplete = true; - argsSorted = true; -} - -void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry) -{ - assert(argCount < argTableSize); - argTable[argCount] = curArgTabEntry; - argCount++; -} - -fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, - GenTree* node, - GenTreeCall::Use* use, - regNumber regNum, - unsigned numRegs, - unsigned byteSize, - unsigned byteAlignment, - bool isStruct, - bool isFloatHfa, - bool isVararg /*=false*/) -{ - fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; - - // Any additional register numbers are set by the caller. - // This is primarily because on ARM we don't yet know if it - // will be split or if it is a double HFA, so the number of registers - // may actually be less. - curArgTabEntry->setRegNum(0, regNum); - - curArgTabEntry->argNum = argNum; - curArgTabEntry->argType = node->TypeGet(); - curArgTabEntry->use = use; - curArgTabEntry->lateUse = nullptr; - curArgTabEntry->numRegs = numRegs; - -#if defined(DEBUG_ARG_SLOTS) - curArgTabEntry->slotNum = 0; - curArgTabEntry->numSlots = 0; -#endif - - curArgTabEntry->SetLateArgInx(UINT_MAX); - curArgTabEntry->tmpNum = BAD_VAR_NUM; - curArgTabEntry->SetSplit(false); - curArgTabEntry->isTmp = false; - curArgTabEntry->needTmp = false; - curArgTabEntry->needPlace = false; - curArgTabEntry->processed = false; - if (GlobalJitOptions::compFeatureHfa) - { - curArgTabEntry->SetHfaElemKind(CORINFO_HFA_ELEM_NONE); - } - curArgTabEntry->isBackFilled = false; - curArgTabEntry->isNonStandard = false; - curArgTabEntry->isStruct = isStruct; - curArgTabEntry->SetIsVararg(isVararg); - curArgTabEntry->SetByteAlignment(byteAlignment); - curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa); - curArgTabEntry->SetByteOffset(0); - - hasRegArgs = true; - AddArg(curArgTabEntry); - return curArgTabEntry; -} - -#if defined(UNIX_AMD64_ABI) -fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, - GenTree* node, - GenTreeCall::Use* use, - regNumber regNum, - unsigned numRegs, - unsigned byteSize, - unsigned byteAlignment, - const bool isStruct, - const bool isFloatHfa, - const bool isVararg, - const regNumber otherRegNum, - const unsigned structIntRegs, - const unsigned structFloatRegs, - const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr) -{ - fgArgTabEntry* curArgTabEntry = - AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, isFloatHfa, isVararg); - assert(curArgTabEntry != nullptr); - - curArgTabEntry->isStruct = isStruct; // is this a struct arg - curArgTabEntry->structIntRegs = structIntRegs; - curArgTabEntry->structFloatRegs = structFloatRegs; - - INDEBUG(curArgTabEntry->checkIsStruct();) - assert(numRegs <= 2); - if (numRegs == 2) - { - curArgTabEntry->setRegNum(1, otherRegNum); - } - - if (isStruct && structDescPtr != nullptr) - { - curArgTabEntry->structDesc.CopyFrom(*structDescPtr); - } - - return curArgTabEntry; -} -#endif // defined(UNIX_AMD64_ABI) - -fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, - GenTree* node, - GenTreeCall::Use* use, - unsigned numSlots, - unsigned byteSize, - unsigned byteAlignment, - bool isStruct, - bool isFloatHfa, - bool isVararg /*=false*/) -{ - fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; - -#if defined(DEBUG_ARG_SLOTS) - nextSlotNum = roundUp(nextSlotNum, byteAlignment / TARGET_POINTER_SIZE); -#endif - - nextStackByteOffset = roundUp(nextStackByteOffset, byteAlignment); - DEBUG_ARG_SLOTS_ASSERT(nextStackByteOffset / TARGET_POINTER_SIZE == nextSlotNum); - - curArgTabEntry->setRegNum(0, REG_STK); - curArgTabEntry->argNum = argNum; - curArgTabEntry->argType = node->TypeGet(); - curArgTabEntry->use = use; - curArgTabEntry->lateUse = nullptr; -#if defined(DEBUG_ARG_SLOTS) - curArgTabEntry->numSlots = numSlots; - curArgTabEntry->slotNum = nextSlotNum; -#endif - - curArgTabEntry->numRegs = 0; -#if defined(UNIX_AMD64_ABI) - curArgTabEntry->structIntRegs = 0; - curArgTabEntry->structFloatRegs = 0; -#endif // defined(UNIX_AMD64_ABI) - curArgTabEntry->SetLateArgInx(UINT_MAX); - curArgTabEntry->tmpNum = BAD_VAR_NUM; - curArgTabEntry->SetSplit(false); - curArgTabEntry->isTmp = false; - curArgTabEntry->needTmp = false; - curArgTabEntry->needPlace = false; - curArgTabEntry->processed = false; - if (GlobalJitOptions::compFeatureHfa) - { - curArgTabEntry->SetHfaElemKind(CORINFO_HFA_ELEM_NONE); - } - curArgTabEntry->isBackFilled = false; - curArgTabEntry->isNonStandard = false; - curArgTabEntry->isStruct = isStruct; - curArgTabEntry->SetIsVararg(isVararg); - - curArgTabEntry->SetByteAlignment(byteAlignment); - curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa); - curArgTabEntry->SetByteOffset(nextStackByteOffset); - - hasStackArgs = true; - AddArg(curArgTabEntry); - DEBUG_ARG_SLOTS_ONLY(nextSlotNum += numSlots;) - nextStackByteOffset += curArgTabEntry->GetByteSize(); - - return curArgTabEntry; -} - -void fgArgInfo::RemorphReset() -{ - DEBUG_ARG_SLOTS_ONLY(nextSlotNum = INIT_ARG_STACK_SLOT;) - nextStackByteOffset = INIT_ARG_STACK_SLOT * TARGET_POINTER_SIZE; -} - -//------------------------------------------------------------------------ -// UpdateRegArg: Update the given fgArgTabEntry while morphing. -// -// Arguments: -// curArgTabEntry - the fgArgTabEntry to update. -// node - the tree node that defines the argument -// reMorphing - a boolean value indicate whether we are remorphing the call -// -// Assumptions: -// This must have already been determined to be at least partially passed in registers. -// -void fgArgInfo::UpdateRegArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) -{ - bool isLateArg = curArgTabEntry->isLateArg(); - // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. - assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || - (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); - - assert(curArgTabEntry->numRegs != 0); - assert(curArgTabEntry->use->GetNode() == node); -} - -//------------------------------------------------------------------------ -// UpdateStkArg: Update the given fgArgTabEntry while morphing. -// -// Arguments: -// curArgTabEntry - the fgArgTabEntry to update. -// node - the tree node that defines the argument -// reMorphing - a boolean value indicate whether we are remorphing the call -// -// Assumptions: -// This must have already been determined to be passed on the stack. -// -void fgArgInfo::UpdateStkArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) -{ - bool isLateArg = curArgTabEntry->isLateArg(); - // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. - assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || - (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); - - noway_assert(curArgTabEntry->use != callTree->gtCallThisArg); - assert((curArgTabEntry->GetRegNum() == REG_STK) || curArgTabEntry->IsSplit()); - assert(curArgTabEntry->use->GetNode() == node); -#if defined(DEBUG_ARG_SLOTS) - nextSlotNum = roundUp(nextSlotNum, curArgTabEntry->GetByteAlignment() / TARGET_POINTER_SIZE); - assert(curArgTabEntry->slotNum == nextSlotNum); - nextSlotNum += curArgTabEntry->numSlots; -#endif - - nextStackByteOffset = roundUp(nextStackByteOffset, curArgTabEntry->GetByteAlignment()); - assert(curArgTabEntry->GetByteOffset() == nextStackByteOffset); - nextStackByteOffset += curArgTabEntry->GetStackByteSize(); -} - -void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots) -{ - fgArgTabEntry* curArgTabEntry = nullptr; - assert(argNum < argCount); - for (unsigned inx = 0; inx < argCount; inx++) - { - curArgTabEntry = argTable[inx]; - if (curArgTabEntry->argNum == argNum) - { - break; - } - } - - assert(numRegs > 0); - assert(numSlots > 0); - - if (argsComplete) - { - assert(curArgTabEntry->IsSplit() == true); - assert(curArgTabEntry->numRegs == numRegs); - DEBUG_ARG_SLOTS_ONLY(assert(curArgTabEntry->numSlots == numSlots);) - assert(hasStackArgs == true); - } - else - { - curArgTabEntry->SetSplit(true); - curArgTabEntry->numRegs = numRegs; - DEBUG_ARG_SLOTS_ONLY(curArgTabEntry->numSlots = numSlots;) - curArgTabEntry->SetByteOffset(0); - hasStackArgs = true; - } - DEBUG_ARG_SLOTS_ONLY(nextSlotNum += numSlots;) - // TODO-Cleanup: structs are aligned to 8 bytes on arm64 apple, so it would work, but pass the precise size. - nextStackByteOffset += numSlots * TARGET_POINTER_SIZE; -} - -//------------------------------------------------------------------------ -// EvalToTmp: Replace the node in the given fgArgTabEntry with a temp -// -// Arguments: -// curArgTabEntry - the fgArgTabEntry for the argument -// tmpNum - the varNum for the temp -// newNode - the assignment of the argument value to the temp -// -// Notes: -// Although the name of this method is EvalToTmp, it doesn't actually create -// the temp or the copy. -// -void fgArgInfo::EvalToTmp(fgArgTabEntry* curArgTabEntry, unsigned tmpNum, GenTree* newNode) -{ - assert(curArgTabEntry->use != callTree->gtCallThisArg); - assert(curArgTabEntry->use->GetNode() == newNode); - - assert(curArgTabEntry->GetNode() == newNode); - curArgTabEntry->tmpNum = tmpNum; - curArgTabEntry->isTmp = true; -} - -void fgArgInfo::ArgsComplete() -{ - bool hasStructRegArg = false; - - for (unsigned curInx = 0; curInx < argCount; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - assert(curArgTabEntry != nullptr); - GenTree* argx = curArgTabEntry->GetNode(); - - if (curArgTabEntry->GetRegNum() == REG_STK) - { - assert(hasStackArgs == true); -#if !FEATURE_FIXED_OUT_ARGS - // On x86 we use push instructions to pass arguments: - // The non-register arguments are evaluated and pushed in order - // and they are never evaluated into temps - // - continue; -#endif - } -#if FEATURE_ARG_SPLIT - else if (curArgTabEntry->IsSplit()) - { - hasStructRegArg = true; - assert(hasStackArgs == true); - } -#endif // FEATURE_ARG_SPLIT - else // we have a register argument, next we look for a struct type. - { - if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct)) - { - hasStructRegArg = true; - } - } - - /* If the argument tree contains an assignment (GTF_ASG) then the argument and - and every earlier argument (except constants) must be evaluated into temps - since there may be other arguments that follow and they may use the value being assigned. - - EXAMPLE: ArgTab is "a, a=5, a" - -> when we see the second arg "a=5" - we know the first two arguments "a, a=5" have to be evaluated into temps - - For the case of an assignment, we only know that there exist some assignment someplace - in the tree. We don't know what is being assigned so we are very conservative here - and assume that any local variable could have been assigned. - */ - - if (argx->gtFlags & GTF_ASG) - { - // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to - // a tmp, then we need a temp in the late arg list. - if ((argCount > 1) || argx->OperIsCopyBlkOp() -#ifdef FEATURE_FIXED_OUT_ARGS - || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property - // that we only have late non-register args when that feature is on. -#endif // FEATURE_FIXED_OUT_ARGS - ) - { - curArgTabEntry->needTmp = true; - needsTemps = true; - } - - // For all previous arguments, unless they are a simple constant - // we require that they be evaluated into temps - for (unsigned prevInx = 0; prevInx < curInx; prevInx++) - { - fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; - assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); - - if (!prevArgTabEntry->GetNode()->IsInvariant()) - { - prevArgTabEntry->needTmp = true; - needsTemps = true; - } - } - } - - bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0); -#if FEATURE_FIXED_OUT_ARGS - // Like calls, if this argument has a tree that will do an inline throw, - // a call to a jit helper, then we need to treat it like a call (but only - // if there are/were any stack args). - // This means unnesting, sorting, etc. Technically this is overly - // conservative, but I want to avoid as much special-case debug-only code - // as possible, so leveraging the GTF_CALL flag is the easiest. - // - if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && compiler->opts.compDbgCode && - (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT)) - { - for (unsigned otherInx = 0; otherInx < argCount; otherInx++) - { - if (otherInx == curInx) - { - continue; - } - - if (argTable[otherInx]->GetRegNum() == REG_STK) - { - treatLikeCall = true; - break; - } - } - } -#endif // FEATURE_FIXED_OUT_ARGS - - /* If it contains a call (GTF_CALL) then itself and everything before the call - with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT - has to be kept in the right order since we will move the call to the first position) - - For calls we don't have to be quite as conservative as we are with an assignment - since the call won't be modifying any non-address taken LclVars. - */ - - if (treatLikeCall) - { - if (argCount > 1) // If this is not the only argument - { - curArgTabEntry->needTmp = true; - needsTemps = true; - } - else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL)) - { - // Spill all arguments that are floating point calls - curArgTabEntry->needTmp = true; - needsTemps = true; - } - - // All previous arguments may need to be evaluated into temps - for (unsigned prevInx = 0; prevInx < curInx; prevInx++) - { - fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; - assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); - - // For all previous arguments, if they have any GTF_ALL_EFFECT - // we require that they be evaluated into a temp - if ((prevArgTabEntry->GetNode()->gtFlags & GTF_ALL_EFFECT) != 0) - { - prevArgTabEntry->needTmp = true; - needsTemps = true; - } -#if FEATURE_FIXED_OUT_ARGS - // Or, if they are stored into the FIXED_OUT_ARG area - // we require that they be moved to the gtCallLateArgs - // and replaced with a placeholder node - else if (prevArgTabEntry->GetRegNum() == REG_STK) - { - prevArgTabEntry->needPlace = true; - } -#if FEATURE_ARG_SPLIT - else if (prevArgTabEntry->IsSplit()) - { - prevArgTabEntry->needPlace = true; - } -#endif // TARGET_ARM -#endif - } - } - -#if FEATURE_MULTIREG_ARGS - // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST - // with multiple indirections, so here we consider spilling it into a tmp LclVar. - // - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARM - bool isMultiRegArg = - (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->GetStackSlotsNumber() > 1); -#else - bool isMultiRegArg = (curArgTabEntry->numRegs > 1); -#endif - - if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false)) - { - if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)) - { - // Spill multireg struct arguments that have Assignments or Calls embedded in them - curArgTabEntry->needTmp = true; - needsTemps = true; - } - else - { - // We call gtPrepareCost to measure the cost of evaluating this tree - compiler->gtPrepareCost(argx); - - if (isMultiRegArg && (argx->GetCostEx() > (6 * IND_COST_EX))) - { - // Spill multireg struct arguments that are expensive to evaluate twice - curArgTabEntry->needTmp = true; - needsTemps = true; - } -#if defined(FEATURE_SIMD) && defined(TARGET_ARM64) - else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet())) - { - // SIMD types do not need the optimization below due to their sizes - if (argx->OperIsSimdOrHWintrinsic() || - (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) && - argx->AsObj()->gtOp1->AsOp()->gtOp1->OperIsSimdOrHWintrinsic())) - { - curArgTabEntry->needTmp = true; - needsTemps = true; - } - } -#endif -#ifndef TARGET_ARM - // TODO-Arm: This optimization is not implemented for ARM32 - // so we skip this for ARM32 until it is ported to use RyuJIT backend - // - else if (argx->OperGet() == GT_OBJ) - { - GenTreeObj* argObj = argx->AsObj(); - unsigned structSize = argObj->GetLayout()->GetSize(); - switch (structSize) - { - case 3: - case 5: - case 6: - case 7: - // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes - // - if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar? - { - // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes - // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp. - // - curArgTabEntry->needTmp = true; - needsTemps = true; - } - break; - case 11: - case 13: - case 14: - case 15: - // Spill any GT_OBJ multireg structs that are difficult to extract - // - // When we have a GT_OBJ of a struct with the above sizes we would need - // to use 3 or 4 load instructions to load the exact size of this struct. - // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence - // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp. - // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing - // the argument. - // - curArgTabEntry->needTmp = true; - needsTemps = true; - break; - - default: - break; - } - } -#endif // !TARGET_ARM - } - } -#endif // FEATURE_MULTIREG_ARGS - } - - // We only care because we can't spill structs and qmarks involve a lot of spilling, but - // if we don't have qmarks, then it doesn't matter. - // So check for Qmark's globally once here, instead of inside the loop. - // - const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed); - -#if FEATURE_FIXED_OUT_ARGS - - // For Arm/x64 we only care because we can't reorder a register - // argument that uses GT_LCLHEAP. This is an optimization to - // save a check inside the below loop. - // - const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed); - -#else - - const bool hasStackArgsWeCareAbout = hasStackArgs; - -#endif // FEATURE_FIXED_OUT_ARGS - - // If we have any stack args we have to force the evaluation - // of any arguments passed in registers that might throw an exception - // - // Technically we only a required to handle the following two cases: - // a GT_IND with GTF_IND_RNGCHK (only on x86) or - // a GT_LCLHEAP node that allocates stuff on the stack - // - if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout) - { - for (unsigned curInx = 0; curInx < argCount; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - assert(curArgTabEntry != nullptr); - GenTree* argx = curArgTabEntry->GetNode(); - - // Examine the register args that are currently not marked needTmp - // - if (!curArgTabEntry->needTmp && (curArgTabEntry->GetRegNum() != REG_STK)) - { - if (hasStackArgsWeCareAbout) - { -#if !FEATURE_FIXED_OUT_ARGS - // On x86 we previously recorded a stack depth of zero when - // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag - // Thus we can not reorder the argument after any stack based argument - // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to - // check for it explicitly.) - // - if (argx->gtFlags & GTF_EXCEPT) - { - curArgTabEntry->needTmp = true; - needsTemps = true; - continue; - } -#else - // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP - // - if (argx->gtFlags & GTF_EXCEPT) - { - assert(compiler->compLocallocUsed); - - // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree - // - if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT) - { - curArgTabEntry->needTmp = true; - needsTemps = true; - continue; - } - } -#endif - } - if (hasStructRegArgWeCareAbout) - { - // Returns true if a GT_QMARK node is encountered in the argx tree - // - if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT) - { - curArgTabEntry->needTmp = true; - needsTemps = true; - continue; - } - } - } - } - } - - argsComplete = true; -} - -void fgArgInfo::SortArgs() -{ - assert(argsComplete == true); - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\nSorting the arguments:\n"); - } -#endif - - /* Shuffle the arguments around before we build the gtCallLateArgs list. - The idea is to move all "simple" arguments like constants and local vars - to the end of the table, and move the complex arguments towards the beginning - of the table. This will help prevent registers from being spilled by - allowing us to evaluate the more complex arguments before the simpler arguments. - The argTable ends up looking like: - +------------------------------------+ <--- argTable[argCount - 1] - | constants | - +------------------------------------+ - | local var / local field | - +------------------------------------+ - | remaining arguments sorted by cost | - +------------------------------------+ - | temps (argTable[].needTmp = true) | - +------------------------------------+ - | args with calls (GTF_CALL) | - +------------------------------------+ <--- argTable[0] - */ - - /* Set the beginning and end for the new argument table */ - unsigned curInx; - int regCount = 0; - unsigned begTab = 0; - unsigned endTab = argCount - 1; - unsigned argsRemaining = argCount; - - // First take care of arguments that are constants. - // [We use a backward iterator pattern] - // - curInx = argCount; - do - { - curInx--; - - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - if (curArgTabEntry->GetRegNum() != REG_STK) - { - regCount++; - } - - assert(curArgTabEntry->lateUse == nullptr); - - // Skip any already processed args - // - if (!curArgTabEntry->processed) - { - GenTree* argx = curArgTabEntry->GetNode(); - - // put constants at the end of the table - // - if (argx->gtOper == GT_CNS_INT) - { - noway_assert(curInx <= endTab); - - curArgTabEntry->processed = true; - - // place curArgTabEntry at the endTab position by performing a swap - // - if (curInx != endTab) - { - argTable[curInx] = argTable[endTab]; - argTable[endTab] = curArgTabEntry; - } - - endTab--; - argsRemaining--; - } - } - } while (curInx > 0); - - if (argsRemaining > 0) - { - // Next take care of arguments that are calls. - // [We use a forward iterator pattern] - // - for (curInx = begTab; curInx <= endTab; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - // Skip any already processed args - // - if (!curArgTabEntry->processed) - { - GenTree* argx = curArgTabEntry->GetNode(); - - // put calls at the beginning of the table - // - if (argx->gtFlags & GTF_CALL) - { - curArgTabEntry->processed = true; - - // place curArgTabEntry at the begTab position by performing a swap - // - if (curInx != begTab) - { - argTable[curInx] = argTable[begTab]; - argTable[begTab] = curArgTabEntry; - } - - begTab++; - argsRemaining--; - } - } - } - } - - if (argsRemaining > 0) - { - // Next take care arguments that are temps. - // These temps come before the arguments that are - // ordinary local vars or local fields - // since this will give them a better chance to become - // enregistered into their actual argument register. - // [We use a forward iterator pattern] - // - for (curInx = begTab; curInx <= endTab; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - // Skip any already processed args - // - if (!curArgTabEntry->processed) - { - if (curArgTabEntry->needTmp) - { - curArgTabEntry->processed = true; - - // place curArgTabEntry at the begTab position by performing a swap - // - if (curInx != begTab) - { - argTable[curInx] = argTable[begTab]; - argTable[begTab] = curArgTabEntry; - } - - begTab++; - argsRemaining--; - } - } - } - } - - if (argsRemaining > 0) - { - // Next take care of local var and local field arguments. - // These are moved towards the end of the argument evaluation. - // [We use a backward iterator pattern] - // - curInx = endTab + 1; - do - { - curInx--; - - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - // Skip any already processed args - // - if (!curArgTabEntry->processed) - { - GenTree* argx = curArgTabEntry->GetNode(); - - if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD)) - { - noway_assert(curInx <= endTab); - - curArgTabEntry->processed = true; - - // place curArgTabEntry at the endTab position by performing a swap - // - if (curInx != endTab) - { - argTable[curInx] = argTable[endTab]; - argTable[endTab] = curArgTabEntry; - } - - endTab--; - argsRemaining--; - } - } - } while (curInx > begTab); - } - - // Finally, take care of all the remaining arguments. - // Note that we fill in one arg at a time using a while loop. - bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop - while (argsRemaining > 0) - { - /* Find the most expensive arg remaining and evaluate it next */ - - fgArgTabEntry* expensiveArgTabEntry = nullptr; - unsigned expensiveArg = UINT_MAX; - unsigned expensiveArgCost = 0; - - // [We use a forward iterator pattern] - // - for (curInx = begTab; curInx <= endTab; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - // Skip any already processed args - // - if (!curArgTabEntry->processed) - { - GenTree* argx = curArgTabEntry->GetNode(); - - // We should have already handled these kinds of args - assert(argx->gtOper != GT_LCL_VAR); - assert(argx->gtOper != GT_LCL_FLD); - assert(argx->gtOper != GT_CNS_INT); - - // This arg should either have no persistent side effects or be the last one in our table - // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); - - if (argsRemaining == 1) - { - // This is the last arg to place - expensiveArg = curInx; - expensiveArgTabEntry = curArgTabEntry; - assert(begTab == endTab); - break; - } - else - { - if (!costsPrepared) - { - /* We call gtPrepareCost to measure the cost of evaluating this tree */ - compiler->gtPrepareCost(argx); - } - - if (argx->GetCostEx() > expensiveArgCost) - { - // Remember this arg as the most expensive one that we have yet seen - expensiveArgCost = argx->GetCostEx(); - expensiveArg = curInx; - expensiveArgTabEntry = curArgTabEntry; - } - } - } - } - - noway_assert(expensiveArg != UINT_MAX); - - // put the most expensive arg towards the beginning of the table - - expensiveArgTabEntry->processed = true; - - // place expensiveArgTabEntry at the begTab position by performing a swap - // - if (expensiveArg != begTab) - { - argTable[expensiveArg] = argTable[begTab]; - argTable[begTab] = expensiveArgTabEntry; - } - - begTab++; - argsRemaining--; - - costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop - } - - // The table should now be completely filled and thus begTab should now be adjacent to endTab - // and regArgsRemaining should be zero - assert(begTab == (endTab + 1)); - assert(argsRemaining == 0); - -#if !FEATURE_FIXED_OUT_ARGS - // Finally build the regArgList - // - callTree->AsCall()->regArgList = NULL; - callTree->AsCall()->regArgListCount = regCount; - - unsigned regInx = 0; - for (curInx = 0; curInx < argCount; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - if (curArgTabEntry->GetRegNum() != REG_STK) - { - // Encode the argument register in the register mask - // - callTree->AsCall()->regArgList[regInx] = curArgTabEntry->GetRegNum(); - regInx++; - } - } -#endif // !FEATURE_FIXED_OUT_ARGS - - argsSorted = true; -} - -#ifdef DEBUG -void fgArgInfo::Dump(Compiler* compiler) const -{ - for (unsigned curInx = 0; curInx < ArgCount(); curInx++) - { - fgArgTabEntry* curArgEntry = ArgTable()[curInx]; - curArgEntry->Dump(); - } -} -#endif - -//------------------------------------------------------------------------------ -// fgMakeTmpArgNode : This function creates a tmp var only if needed. -// We need this to be done in order to enforce ordering -// of the evaluation of arguments. -// -// Arguments: -// curArgTabEntry -// -// Return Value: -// the newly created temp var tree. - -GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) -{ - unsigned tmpVarNum = curArgTabEntry->tmpNum; - LclVarDsc* varDsc = &lvaTable[tmpVarNum]; - assert(varDsc->lvIsTemp); - var_types type = varDsc->TypeGet(); - - // Create a copy of the temp to go into the late argument list - GenTree* arg = gtNewLclvNode(tmpVarNum, type); - GenTree* addrNode = nullptr; - - if (varTypeIsStruct(type)) - { - -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) - - // Can this type be passed as a primitive type? - // If so, the following call will return the corresponding primitive type. - // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type. - - bool passedAsPrimitive = false; - if (curArgTabEntry->TryPassAsPrimitive()) - { - CORINFO_CLASS_HANDLE clsHnd = varDsc->GetStructHnd(); - var_types structBaseType = - getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd, curArgTabEntry->IsVararg()); - - if (structBaseType != TYP_UNKNOWN) - { - passedAsPrimitive = true; -#if defined(UNIX_AMD64_ABI) - // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry, - // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take - // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again. - // - if (genIsValidFloatReg(curArgTabEntry->GetRegNum())) - { - if (structBaseType == TYP_INT) - { - structBaseType = TYP_FLOAT; - } - else - { - assert(structBaseType == TYP_LONG); - structBaseType = TYP_DOUBLE; - } - } -#endif - type = structBaseType; - } - } - - // If it is passed in registers, don't get the address of the var. Make it a - // field instead. It will be loaded in registers with putarg_reg tree in lower. - if (passedAsPrimitive) - { - arg->ChangeOper(GT_LCL_FLD); - arg->gtType = type; - } - else - { - var_types addrType = TYP_BYREF; - arg = gtNewOperNode(GT_ADDR, addrType, arg); - addrNode = arg; - -#if FEATURE_MULTIREG_ARGS -#ifdef TARGET_ARM64 - assert(varTypeIsStruct(type)); - if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg())) - { - // We will create a GT_OBJ for the argument below. - // This will be passed by value in two registers. - assert(addrNode != nullptr); - - // Create an Obj of the temp to use it as a call argument. - arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); - } -#else - // Always create an Obj of the temp to use it as a call argument. - arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); -#endif // !TARGET_ARM64 -#endif // FEATURE_MULTIREG_ARGS - } - -#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) - - // other targets, we pass the struct by value - assert(varTypeIsStruct(type)); - - addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); - - // Get a new Obj node temp to use it as a call argument. - // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object. - arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode); - -#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) - - } // (varTypeIsStruct(type)) - - if (addrNode != nullptr) - { - assert(addrNode->gtOper == GT_ADDR); - - // This will prevent this LclVar from being optimized away - lvaSetVarAddrExposed(tmpVarNum); - - // the child of a GT_ADDR is required to have this flag set - addrNode->AsOp()->gtOp1->gtFlags |= GTF_DONT_CSE; - } - - return arg; -} - -//------------------------------------------------------------------------------ -// EvalArgsToTemps : Create temp assignments and populate the LateArgs list. - -void fgArgInfo::EvalArgsToTemps() -{ - assert(argsSorted); - - unsigned regArgInx = 0; - // Now go through the argument table and perform the necessary evaluation into temps - GenTreeCall::Use* tmpRegArgNext = nullptr; - for (unsigned curInx = 0; curInx < argCount; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - assert(curArgTabEntry->lateUse == nullptr); - - GenTree* argx = curArgTabEntry->GetNode(); - GenTree* setupArg = nullptr; - GenTree* defArg; - -#if !FEATURE_FIXED_OUT_ARGS - // Only ever set for FEATURE_FIXED_OUT_ARGS - assert(curArgTabEntry->needPlace == false); - - // On x86 and other archs that use push instructions to pass arguments: - // Only the register arguments need to be replaced with placeholder nodes. - // Stacked arguments are evaluated and pushed (or stored into the stack) in order. - // - if (curArgTabEntry->GetRegNum() == REG_STK) - continue; -#endif - - if (curArgTabEntry->needTmp) - { - if (curArgTabEntry->isTmp == true) - { - // Create a copy of the temp to go into the late argument list - defArg = compiler->fgMakeTmpArgNode(curArgTabEntry); - - // mark the original node as a late argument - argx->gtFlags |= GTF_LATE_ARG; - } - else - { - // Create a temp assignment for the argument - // Put the temp in the gtCallLateArgs list - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - if (compiler->verbose) - { - printf("Argument with 'side effect'...\n"); - compiler->gtDispTree(argx); - } -#endif - -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - noway_assert(argx->gtType != TYP_STRUCT); -#endif - - unsigned tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect")); - if (argx->gtOper == GT_MKREFANY) - { - // For GT_MKREFANY, typically the actual struct copying does - // not have any side-effects and can be delayed. So instead - // of using a temp for the whole struct, we can just use a temp - // for operand that that has a side-effect - GenTree* operand; - if ((argx->AsOp()->gtOp2->gtFlags & GTF_ALL_EFFECT) == 0) - { - operand = argx->AsOp()->gtOp1; - - // In the early argument evaluation, place an assignment to the temp - // from the source operand of the mkrefany - setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); - - // Replace the operand for the mkrefany with the new temp. - argx->AsOp()->gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); - } - else if ((argx->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT) == 0) - { - operand = argx->AsOp()->gtOp2; - - // In the early argument evaluation, place an assignment to the temp - // from the source operand of the mkrefany - setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); - - // Replace the operand for the mkrefany with the new temp. - argx->AsOp()->gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); - } - } - - if (setupArg != nullptr) - { - // Now keep the mkrefany for the late argument list - defArg = argx; - - // Clear the side-effect flags because now both op1 and op2 have no side-effects - defArg->gtFlags &= ~GTF_ALL_EFFECT; - } - else - { - setupArg = compiler->gtNewTempAssign(tmpVarNum, argx); - - LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum; - var_types lclVarType = genActualType(argx->gtType); - var_types scalarType = TYP_UNKNOWN; - - if (setupArg->OperIsCopyBlkOp()) - { - setupArg = compiler->fgMorphCopyBlock(setupArg); -#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) - if (lclVarType == TYP_STRUCT) - { - // This scalar LclVar widening step is only performed for ARM architectures. - // - CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); - unsigned structSize = varDsc->lvExactSize; - - scalarType = - compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->IsVararg()); - } -#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) - } - - // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => - // 8) - if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) - { - // Create a GT_LCL_FLD using the wider type to go to the late argument list - defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0); - } - else - { - // Create a copy of the temp to go to the late argument list - defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType); - } - - curArgTabEntry->isTmp = true; - curArgTabEntry->tmpNum = tmpVarNum; - -#ifdef TARGET_ARM - // Previously we might have thought the local was promoted, and thus the 'COPYBLK' - // might have left holes in the used registers (see - // fgAddSkippedRegsInPromotedStructArg). - // Too bad we're not that smart for these intermediate temps... - if (isValidIntArgReg(curArgTabEntry->GetRegNum()) && (curArgTabEntry->numRegs > 1)) - { - regNumber argReg = curArgTabEntry->GetRegNum(); - regMaskTP allUsedRegs = genRegMask(curArgTabEntry->GetRegNum()); - for (unsigned i = 1; i < curArgTabEntry->numRegs; i++) - { - argReg = genRegArgNext(argReg); - allUsedRegs |= genRegMask(argReg); - } - } -#endif // TARGET_ARM - } - - /* mark the assignment as a late argument */ - setupArg->gtFlags |= GTF_LATE_ARG; - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\n Evaluate to a temp:\n"); - compiler->gtDispTree(setupArg); - } -#endif - } - } - else // curArgTabEntry->needTmp == false - { - // On x86 - - // Only register args are replaced with placeholder nodes - // and the stack based arguments are evaluated and pushed in order. - // - // On Arm/x64 - When needTmp is false and needPlace is false, - // the non-register arguments are evaluated and stored in order. - // When needPlace is true we have a nested call that comes after - // this argument so we have to replace it in the gtCallArgs list - // (the initial argument evaluation list) with a placeholder. - // - if ((curArgTabEntry->GetRegNum() == REG_STK) && (curArgTabEntry->needPlace == false)) - { - continue; - } - - /* No temp needed - move the whole node to the gtCallLateArgs list */ - - /* The argument is deferred and put in the late argument list */ - - defArg = argx; - - // Create a placeholder node to put in its place in gtCallLateArgs. - - // For a struct type we also need to record the class handle of the arg. - CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; - -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - - // All structs are either passed (and retyped) as integral types, OR they - // are passed by reference. - noway_assert(argx->gtType != TYP_STRUCT); - -#else // !defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI) - - if (defArg->TypeGet() == TYP_STRUCT) - { - clsHnd = compiler->gtGetStructHandleIfPresent(defArg); - noway_assert(clsHnd != NO_CLASS_HANDLE); - } - -#endif // !(defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) - - setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd); - - /* mark the placeholder node as a late argument */ - setupArg->gtFlags |= GTF_LATE_ARG; - -#ifdef DEBUG - if (compiler->verbose) - { - if (curArgTabEntry->GetRegNum() == REG_STK) - { - printf("Deferred stack argument :\n"); - } - else - { - printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->GetRegNum())); - } - - compiler->gtDispTree(argx); - printf("Replaced with placeholder node:\n"); - compiler->gtDispTree(setupArg); - } -#endif - } - - if (setupArg != nullptr) - { - noway_assert(curArgTabEntry->use->GetNode() == argx); - curArgTabEntry->use->SetNode(setupArg); - } - - /* deferred arg goes into the late argument list */ - - if (tmpRegArgNext == nullptr) - { - tmpRegArgNext = compiler->gtNewCallArgs(defArg); - callTree->AsCall()->gtCallLateArgs = tmpRegArgNext; - } - else - { - noway_assert(tmpRegArgNext->GetNode() != nullptr); - tmpRegArgNext->SetNext(compiler->gtNewCallArgs(defArg)); - - tmpRegArgNext = tmpRegArgNext->GetNext(); - } - - curArgTabEntry->lateUse = tmpRegArgNext; - curArgTabEntry->SetLateArgInx(regArgInx++); - } - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\nShuffled argument table: "); - for (unsigned curInx = 0; curInx < argCount; curInx++) - { - fgArgTabEntry* curArgTabEntry = argTable[curInx]; - - if (curArgTabEntry->GetRegNum() != REG_STK) - { - printf("%s ", getRegName(curArgTabEntry->GetRegNum())); - } - } - printf("\n"); - } -#endif -} - -// Return a conservative estimate of the stack size in bytes. -// It will be used only on the intercepted-for-host code path to copy the arguments. -int Compiler::fgEstimateCallStackSize(GenTreeCall* call) -{ - int numArgs = 0; - for (GenTreeCall::Use& use : call->Args()) - { - numArgs++; - } - - int numStkArgs; - if (numArgs > MAX_REG_ARG) - { - numStkArgs = numArgs - MAX_REG_ARG; - } - else - { - numStkArgs = 0; - } - - return numStkArgs * REGSIZE_BYTES; -} - -//------------------------------------------------------------------------------ -// fgMakeMultiUse : If the node is a local, clone it, otherwise insert a comma form temp -// -// Arguments: -// ppTree - a pointer to the child node we will be replacing with the comma expression that -// evaluates ppTree to a temp and returns the result -// -// Return Value: -// A fresh GT_LCL_VAR node referencing the temp which has not been used - -GenTree* Compiler::fgMakeMultiUse(GenTree** pOp) -{ - GenTree* tree = *pOp; - if (tree->IsLocal()) - { - return gtClone(tree); - } - else - { - return fgInsertCommaFormTemp(pOp); - } -} - -//------------------------------------------------------------------------------ -// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree, -// and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl) -// -// Arguments: -// ppTree - a pointer to the child node we will be replacing with the comma expression that -// evaluates ppTree to a temp and returns the result -// -// structType - value type handle if the temp created is of TYP_STRUCT. -// -// Return Value: -// A fresh GT_LCL_VAR node referencing the temp which has not been used -// - -GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/) -{ - GenTree* subTree = *ppTree; - - unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable")); - - if (varTypeIsStruct(subTree)) - { - assert(structType != nullptr); - lvaSetStruct(lclNum, structType, false); - } - - // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree. - // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for - // setting type of lcl vars created. - GenTree* asg = gtNewTempAssign(lclNum, subTree); - - GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, subTree->TypeGet(), lclNum); - - GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load); - - *ppTree = comma; - - return new (this, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, subTree->TypeGet(), lclNum); -} - -//------------------------------------------------------------------------ -// fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg -// -// Arguments: -// callNode - the call for which we are generating the fgArgInfo -// -// Return Value: -// None -// -// Notes: -// This method is idempotent in that it checks whether the fgArgInfo has already been -// constructed, and just returns. -// This method only computes the arg table and arg entries for the call (the fgArgInfo), -// and makes no modification of the args themselves. -// -// The IR for the call args can change for calls with non-standard arguments: some non-standard -// arguments add new call argument IR nodes. -// -void Compiler::fgInitArgInfo(GenTreeCall* call) -{ - GenTreeCall::Use* args; - GenTree* argx; - - unsigned argIndex = 0; - unsigned intArgRegNum = 0; - unsigned fltArgRegNum = 0; - DEBUG_ARG_SLOTS_ONLY(unsigned argSlots = 0;) - - bool callHasRetBuffArg = call->HasRetBufArg(); - bool callIsVararg = call->IsVarargs(); - -#ifdef TARGET_ARM - regMaskTP argSkippedRegMask = RBM_NONE; - regMaskTP fltArgSkippedRegMask = RBM_NONE; -#endif // TARGET_ARM - -#if defined(TARGET_X86) - unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated -#else - const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number -#endif - - if (call->fgArgInfo != nullptr) - { - // We've already initialized and set the fgArgInfo. - return; - } - JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); - - // At this point, we should never have gtCallLateArgs, as this needs to be done before those are determined. - assert(call->gtCallLateArgs == nullptr); - -#ifdef TARGET_UNIX - if (callIsVararg) - { - // Currently native varargs is not implemented on non windows targets. - // - // Note that some targets like Arm64 Unix should not need much work as - // the ABI is the same. While other targets may only need small changes - // such as amd64 Unix, which just expects RAX to pass numFPArguments. - NYI("Morphing Vararg call not yet implemented on non Windows targets."); - } -#endif // TARGET_UNIX - - // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed - // following the normal calling convention or in the normal argument registers. We either mark existing - // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the - // non-standard arguments into the argument list, below. - class NonStandardArgs - { - struct NonStandardArg - { - regNumber reg; // The register to be assigned to this non-standard argument. - GenTree* node; // The tree node representing this non-standard argument. - // Note that this must be updated if the tree node changes due to morphing! - }; - - ArrayStack args; - - public: - NonStandardArgs(CompAllocator alloc) : args(alloc, 3) // We will have at most 3 non-standard arguments - { - } - - //----------------------------------------------------------------------------- - // Add: add a non-standard argument to the table of non-standard arguments - // - // Arguments: - // node - a GenTree node that has a non-standard argument. - // reg - the register to assign to this node. - // - // Return Value: - // None. - // - void Add(GenTree* node, regNumber reg) - { - NonStandardArg nsa = {reg, node}; - args.Push(nsa); - } - - //----------------------------------------------------------------------------- - // Find: Look for a GenTree* in the set of non-standard args. - // - // Arguments: - // node - a GenTree node to look for - // - // Return Value: - // The index of the non-standard argument (a non-negative, unique, stable number). - // If the node is not a non-standard argument, return -1. - // - int Find(GenTree* node) - { - for (int i = 0; i < args.Height(); i++) - { - if (node == args.Top(i).node) - { - return i; - } - } - return -1; - } - - //----------------------------------------------------------------------------- - // FindReg: Look for a GenTree node in the non-standard arguments set. If found, - // set the register to use for the node. - // - // Arguments: - // node - a GenTree node to look for - // pReg - an OUT argument. *pReg is set to the non-standard register to use if - // 'node' is found in the non-standard argument set. - // - // Return Value: - // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the - // register to use. - // 'false' otherwise (in this case, *pReg is unmodified). - // - bool FindReg(GenTree* node, regNumber* pReg) - { - for (int i = 0; i < args.Height(); i++) - { - NonStandardArg& nsa = args.TopRef(i); - if (node == nsa.node) - { - *pReg = nsa.reg; - return true; - } - } - return false; - } - - //----------------------------------------------------------------------------- - // Replace: Replace the non-standard argument node at a given index. This is done when - // the original node was replaced via morphing, but we need to continue to assign a - // particular non-standard arg to it. - // - // Arguments: - // index - the index of the non-standard arg. It must exist. - // node - the new GenTree node. - // - // Return Value: - // None. - // - void Replace(int index, GenTree* node) - { - args.TopRef(index).node = node; - } - - } nonStandardArgs(getAllocator(CMK_ArrayStack)); - - // Count of args. On first morph, this is counted before we've filled in the arg table. - // On remorph, we grab it from the arg table. - unsigned numArgs = 0; - - // First we need to count the args - if (call->gtCallThisArg != nullptr) - { - numArgs++; - } - for (GenTreeCall::Use& use : call->Args()) - { - numArgs++; - } - - // Insert or mark non-standard args. These are either outside the normal calling convention, or - // arguments registers that don't follow the normal progression of argument registers in the calling - // convention (such as for the ARM64 fixed return buffer argument x8). - // - // *********** NOTE ************* - // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments - // in the implementation of fast tail call. - // *********** END NOTE ********* - CLANG_FORMAT_COMMENT_ANCHOR; - -#if defined(TARGET_X86) || defined(TARGET_ARM) - // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention. - // Set the argument registers correctly here. - if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME)) - { - GenTreeCall::Use* args = call->gtCallArgs; - GenTree* arg1 = args->GetNode(); - assert(arg1 != nullptr); - nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME); - } -#endif // defined(TARGET_X86) || defined(TARGET_ARM) -#if defined(TARGET_ARM) - // A non-standard calling convention using wrapper delegate invoke is used on ARM, only, for wrapper - // delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing - // R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs - // to be preserved. Thus, the VM uses a wrapper delegate IL stub, which preserves R4 and also sets up R4 - // correctly for the VSD call. The VM is simply reusing an existing mechanism (wrapper delegate IL stub) - // to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details. - else if (call->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV) - { - GenTree* arg = call->gtCallThisArg->GetNode(); - if (arg->OperIsLocal()) - { - arg = gtClone(arg, true); - } - else - { - GenTree* tmp = fgInsertCommaFormTemp(&arg); - call->gtCallThisArg->SetNode(arg); - call->gtFlags |= GTF_ASG; - arg = tmp; - } - noway_assert(arg != nullptr); - - GenTree* newArg = new (this, GT_ADDR) - GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfWrapperDelegateIndirectCell); - - // Append newArg as the last arg - GenTreeCall::Use** insertionPoint = &call->gtCallArgs; - for (; *insertionPoint != nullptr; insertionPoint = &((*insertionPoint)->NextRef())) - { - } - *insertionPoint = gtNewCallArgs(newArg); - - numArgs++; - nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg()); - } -#endif // defined(TARGET_ARM) -#if defined(TARGET_X86) - // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the - // hi part to be in EDX. This sets the argument registers up correctly. - else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || - call->IsHelperCall(this, CORINFO_HELP_LRSZ)) - { - GenTreeCall::Use* args = call->gtCallArgs; - GenTree* arg1 = args->GetNode(); - assert(arg1 != nullptr); - nonStandardArgs.Add(arg1, REG_LNGARG_LO); - - args = args->GetNext(); - GenTree* arg2 = args->GetNode(); - assert(arg2 != nullptr); - nonStandardArgs.Add(arg2, REG_LNGARG_HI); - } -#else // !TARGET_X86 - // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed. - // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling - // convention for x86/SSE. - - // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it. - // - // We don't use the fixed return buffer argument if we have the special unmanaged instance call convention. - // That convention doesn't use the fixed return buffer register. - // - CLANG_FORMAT_COMMENT_ANCHOR; - - if (call->HasFixedRetBufArg()) - { - args = call->gtCallArgs; - assert(args != nullptr); - - argx = call->gtCallArgs->GetNode(); - - // We don't increment numArgs here, since we already counted this argument above. - - nonStandardArgs.Add(argx, theFixedRetBuffReg()); - } - - // We are allowed to have a Fixed Return Buffer argument combined - // with any of the remaining non-standard arguments - // - CLANG_FORMAT_COMMENT_ANCHOR; - - if (call->IsVirtualStub()) - { - if (!call->IsTailCallViaJitHelper()) - { - GenTree* stubAddrArg = fgGetStubAddrArg(call); - // And push the stub address onto the list of arguments - call->gtCallArgs = gtPrependNewCallArg(stubAddrArg, call->gtCallArgs); - - numArgs++; - nonStandardArgs.Add(stubAddrArg, stubAddrArg->GetRegNum()); - } - else - { - // If it is a VSD call getting dispatched via tail call helper, - // fgMorphTailCallViaJitHelper() would materialize stub addr as an additional - // parameter added to the original arg list and hence no need to - // add as a non-standard arg. - } - } - else -#endif // !TARGET_X86 - if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr)) - { - assert(!call->IsUnmanaged()); - - GenTree* arg = call->gtCallCookie; - noway_assert(arg != nullptr); - call->gtCallCookie = nullptr; - -#if defined(TARGET_X86) - // x86 passes the cookie on the stack as the final argument to the call. - GenTreeCall::Use** insertionPoint = &call->gtCallArgs; - for (; *insertionPoint != nullptr; insertionPoint = &((*insertionPoint)->NextRef())) - { - } - *insertionPoint = gtNewCallArgs(arg); -#else // !defined(TARGET_X86) - // All other architectures pass the cookie in a register. - call->gtCallArgs = gtPrependNewCallArg(arg, call->gtCallArgs); -#endif // defined(TARGET_X86) - - nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM); - numArgs++; - - // put destination into R10/EAX - arg = gtClone(call->gtCallAddr, true); - call->gtCallArgs = gtPrependNewCallArg(arg, call->gtCallArgs); - numArgs++; - - nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM); - - // finally change this call to a helper call - call->gtCallType = CT_HELPER; - call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); - } -#if defined(FEATURE_READYTORUN_COMPILER) && defined(TARGET_ARMARCH) - // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg() - // for indirection cell address, which ZapIndirectHelperThunk expects. - if (call->IsR2RRelativeIndir()) - { - assert(call->gtEntryPoint.addr != nullptr); - - size_t addrValue = (size_t)call->gtEntryPoint.addr; - GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR); -#ifdef DEBUG - indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; -#endif - indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM); -#ifdef TARGET_ARM - // Issue #xxxx : Don't attempt to CSE this constant on ARM32 - // - // This constant has specific register requirements, and LSRA doesn't currently correctly - // handle them when the value is in a CSE'd local. - indirectCellAddress->SetDoNotCSE(); -#endif // TARGET_ARM - - // Push the stub address onto the list of arguments. - call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); - - numArgs++; - nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum()); - } - -#endif // FEATURE_READYTORUN_COMPILER && TARGET_ARMARCH - - // Allocate the fgArgInfo for the call node; - // - call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs); - - // Add the 'this' argument value, if present. - if (call->gtCallThisArg != nullptr) - { - argx = call->gtCallThisArg->GetNode(); - assert(argIndex == 0); - assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT); - assert(varTypeIsGC(argx) || (argx->gtType == TYP_I_IMPL)); - - const regNumber regNum = genMapIntRegArgNumToRegNum(intArgRegNum); - const unsigned numRegs = 1; - const unsigned byteSize = TARGET_POINTER_SIZE; - const unsigned byteAlignment = TARGET_POINTER_SIZE; - const bool isStruct = false; - const bool isFloatHfa = false; - - // This is a register argument - put it in the table. - call->fgArgInfo->AddRegArg(argIndex, argx, call->gtCallThisArg, regNum, numRegs, byteSize, byteAlignment, - isStruct, isFloatHfa, - callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(0) - UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); - - intArgRegNum++; -#ifdef WINDOWS_AMD64_ABI - // Whenever we pass an integer register argument - // we skip the corresponding floating point register argument - fltArgRegNum++; -#endif // WINDOWS_AMD64_ABI - argIndex++; - DEBUG_ARG_SLOTS_ONLY(argSlots++;) - } - -#ifdef TARGET_X86 - // Compute the maximum number of arguments that can be passed in registers. - // For X86 we handle the varargs and unmanaged calling conventions - - if (call->gtFlags & GTF_CALL_POP_ARGS) - { - noway_assert(intArgRegNum < MAX_REG_ARG); - // No more register arguments for varargs (CALL_POP_ARGS) - maxRegArgs = intArgRegNum; - - // Add in the ret buff arg - if (callHasRetBuffArg) - maxRegArgs++; - } - - if (call->IsUnmanaged()) - { - noway_assert(intArgRegNum == 0); - - if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL) - { - noway_assert(call->gtCallArgs->GetNode()->TypeGet() == TYP_I_IMPL || - call->gtCallArgs->GetNode()->TypeGet() == TYP_BYREF || - call->gtCallArgs->GetNode()->gtOper == - GT_NOP); // the arg was already morphed to a register (fgMorph called twice) - maxRegArgs = 1; - } - else - { - maxRegArgs = 0; - } -#ifdef UNIX_X86_ABI - // Add in the ret buff arg - if (callHasRetBuffArg) - maxRegArgs++; -#endif - } -#endif // TARGET_X86 - - /* Morph the user arguments */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#if defined(TARGET_ARM) - - // The ARM ABI has a concept of back-filling of floating-point argument registers, according - // to the "Procedure Call Standard for the ARM Architecture" document, especially - // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can - // appear in a lower-numbered register than floating point argument N. That is, argument - // register allocation is not strictly increasing. To support this, we need to keep track of unused - // floating-point argument registers that we can back-fill. We only support 4-byte float and - // 8-byte double types, and one to four element HFAs composed of these types. With this, we will - // only back-fill single registers, since there is no way with these types to create - // an alignment hole greater than one register. However, there can be up to 3 back-fill slots - // available (with 16 FP argument registers). Consider this code: - // - // struct HFA { float x, y, z; }; // a three element HFA - // void bar(float a1, // passed in f0 - // double a2, // passed in f2/f3; skip f1 for alignment - // HFA a3, // passed in f4/f5/f6 - // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot - // HFA a5, // passed in f10/f11/f12 - // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill - // // slots - // float a7, // passed in f1 (back-filled) - // float a8, // passed in f7 (back-filled) - // float a9, // passed in f13 (back-filled) - // float a10) // passed on the stack in [OutArg+0] - // - // Note that if we ever support FP types with larger alignment requirements, then there could - // be more than single register back-fills. - // - // Once we assign a floating-pointer register to the stack, they all must be on the stack. - // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling - // continues only so long as no VFP CPRC has been allocated to a slot on the stack." - // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack - // and prevent any additional floating-point arguments from going in registers. - - bool anyFloatStackArgs = false; - -#endif // TARGET_ARM - -#ifdef UNIX_AMD64_ABI - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; -#endif // UNIX_AMD64_ABI - -#if defined(DEBUG) - // Check that we have valid information about call's argument types. - // For example: - // load byte; call(int) -> CALL(PUTARG_TYPE byte(IND byte)); - // load int; call(byte) -> CALL(PUTARG_TYPE int (IND int)); - // etc. - if (call->callSig != nullptr) - { - CORINFO_SIG_INFO* sig = call->callSig; - const unsigned sigArgsCount = sig->numArgs; - - GenTreeCall::Use* nodeArgs = call->gtCallArgs; - // It could include many arguments not included in `sig->numArgs`, for example, `this`, runtime lookup, cookie - // etc. - unsigned nodeArgsCount = call->NumChildren(); - - if (call->gtCallThisArg != nullptr) - { - // Handle the most common argument not in the `sig->numArgs`. - // so the following check works on more methods. - nodeArgsCount--; - } - - assert(nodeArgsCount >= sigArgsCount); - if ((nodeArgsCount == sigArgsCount) && - ((Target::g_tgtArgOrder == Target::ARG_ORDER_R2L) || (nodeArgsCount == 1))) - { - CORINFO_ARG_LIST_HANDLE sigArg = sig->args; - for (unsigned i = 0; i < sig->numArgs; ++i) - { - CORINFO_CLASS_HANDLE argClass; - const CorInfoType corType = strip(info.compCompHnd->getArgType(sig, sigArg, &argClass)); - const var_types sigType = JITtype2varType(corType); - - assert(nodeArgs != nullptr); - const GenTree* nodeArg = nodeArgs->GetNode(); - assert(nodeArg != nullptr); - const var_types nodeType = nodeArg->TypeGet(); - - assert((nodeType == sigType) || varTypeIsStruct(sigType) || - genTypeSize(nodeType) == genTypeSize(sigType)); - - sigArg = info.compCompHnd->getArgNext(sigArg); - nodeArgs = nodeArgs->GetNext(); - } - assert(nodeArgs == nullptr); - } - } -#endif // DEBUG - - for (args = call->gtCallArgs; args != nullptr; args = args->GetNext(), argIndex++) - { - argx = args->GetNode()->gtSkipPutArgType(); - - // Change the node to TYP_I_IMPL so we don't report GC info - // NOTE: We deferred this from the importer because of the inliner. - - if (argx->IsLocalAddrExpr() != nullptr) - { - argx->gtType = TYP_I_IMPL; - } - - // We should never have any ArgPlaceHolder nodes at this point. - assert(!argx->IsArgPlaceHolderNode()); - - // Setup any HFA information about 'argx' - bool isHfaArg = false; - var_types hfaType = TYP_UNDEF; - unsigned hfaSlots = 0; - - bool passUsingFloatRegs; -#if !defined(OSX_ARM64_ABI) - unsigned argAlignBytes = TARGET_POINTER_SIZE; -#endif - unsigned size = 0; - unsigned byteSize = 0; - bool isRegArg = false; - bool isNonStandard = false; - regNumber nonStdRegNum = REG_NA; - - if (GlobalJitOptions::compFeatureHfa) - { - hfaType = GetHfaType(argx); - isHfaArg = varTypeIsValidHfaType(hfaType); - -#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) - // Make sure for vararg methods isHfaArg is not true. - isHfaArg = callIsVararg ? false : isHfaArg; -#endif // defined(TARGET_WINDOWS) && defined(TARGET_ARM64) - - if (isHfaArg) - { - isHfaArg = true; - hfaSlots = GetHfaCount(argx); - - // If we have a HFA struct it's possible we transition from a method that originally - // only had integer types to now start having FP types. We have to communicate this - // through this flag since LSRA later on will use this flag to determine whether - // or not to track the FP register set. - // - compFloatingPointUsed = true; - } - } - - const bool isFloatHfa = (hfaType == TYP_FLOAT); - -#ifdef TARGET_ARM - passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx)) && !opts.compUseSoftFP; - bool passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); - - // We don't use the "size" return value from InferOpSizeAlign(). - codeGen->InferOpSizeAlign(argx, &argAlignBytes); - - argAlignBytes = roundUp(argAlignBytes, TARGET_POINTER_SIZE); - - if (argAlignBytes == 2 * TARGET_POINTER_SIZE) - { - if (passUsingFloatRegs) - { - if (fltArgRegNum % 2 == 1) - { - fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); - fltArgRegNum++; - } - } - else if (passUsingIntRegs) - { - if (intArgRegNum % 2 == 1) - { - argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); - intArgRegNum++; - } - } - -#if defined(DEBUG) - if (argSlots % 2 == 1) - { - argSlots++; - } -#endif - } - -#elif defined(TARGET_ARM64) - - assert(!callIsVararg || !isHfaArg); - passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx)); - -#elif defined(TARGET_AMD64) - - passUsingFloatRegs = varTypeIsFloating(argx); - -#elif defined(TARGET_X86) - - passUsingFloatRegs = false; - -#else -#error Unsupported or unset target architecture -#endif // TARGET* - - bool isBackFilled = false; - unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use - var_types structBaseType = TYP_STRUCT; - unsigned structSize = 0; - bool passStructByRef = false; - - bool isStructArg; - GenTree* actualArg = argx->gtEffectiveVal(true /* Commas only */); - - // - // Figure out the size of the argument. This is either in number of registers, or number of - // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and - // the stack. - // - isStructArg = varTypeIsStruct(argx); - CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE; - if (isStructArg) - { - objClass = gtGetStructHandle(argx); - if (argx->TypeGet() == TYP_STRUCT) - { - // For TYP_STRUCT arguments we must have an OBJ, LCL_VAR or MKREFANY - switch (actualArg->OperGet()) - { - case GT_OBJ: - structSize = actualArg->AsObj()->GetLayout()->GetSize(); - assert(structSize == info.compCompHnd->getClassSize(objClass)); - break; - case GT_LCL_VAR: - structSize = lvaGetDesc(actualArg->AsLclVarCommon())->lvExactSize; - break; - case GT_MKREFANY: - structSize = info.compCompHnd->getClassSize(objClass); - break; - default: - BADCODE("illegal argument tree in fgInitArgInfo"); - break; - } - } - else - { - structSize = genTypeSize(argx); - assert(structSize == info.compCompHnd->getClassSize(objClass)); - } - } -#if defined(TARGET_AMD64) -#ifdef UNIX_AMD64_ABI - if (!isStructArg) - { - size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' - byteSize = genTypeSize(argx); - } - else - { - size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - byteSize = structSize; - eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); - } -#else // !UNIX_AMD64_ABI - size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' - if (!isStructArg) - { - byteSize = genTypeSize(argx); - } - -#endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) - if (isStructArg) - { - if (isHfaArg) - { - // HFA structs are passed by value in multiple registers. - // The "size" in registers may differ the size in pointer-sized units. - CORINFO_CLASS_HANDLE structHnd = gtGetStructHandle(argx); - size = GetHfaCount(structHnd); - byteSize = info.compCompHnd->getClassSize(structHnd); - } - else - { - // Structs are either passed in 1 or 2 (64-bit) slots. - // Structs that are the size of 2 pointers are passed by value in multiple registers, - // if sufficient registers are available. - // Structs that are larger than 2 pointers (except for HFAs) are passed by - // reference (to a copy) - size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - byteSize = structSize; - if (size > 2) - { - size = 1; - } - } - // Note that there are some additional rules for multireg structs. - // (i.e they cannot be split between registers and the stack) - } - else - { - size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot' - byteSize = genTypeSize(argx); - } -#elif defined(TARGET_ARM) || defined(TARGET_X86) - if (isStructArg) - { - size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - byteSize = structSize; - } - else - { - // The typical case. - // Long/double type argument(s) will be modified as needed in Lowering. - size = genTypeStSz(argx->gtType); - byteSize = genTypeSize(argx); - } -#else -#error Unsupported or unset target architecture -#endif // TARGET_XXX - - if (isStructArg) - { - assert(argx == args->GetNode()); - assert(structSize != 0); - - structPassingKind howToPassStruct; - structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, structSize); - passStructByRef = (howToPassStruct == SPK_ByReference); - if (howToPassStruct == SPK_ByReference) - { - byteSize = TARGET_POINTER_SIZE; - } - else - { - byteSize = structSize; - } - - if (howToPassStruct == SPK_PrimitiveType) - { -#ifdef TARGET_ARM - // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct, - // or for a struct of two floats. This causes the struct to be address-taken. - if (structBaseType == TYP_DOUBLE) - { - size = 2; - } - else -#endif // TARGET_ARM - { - size = 1; - } - } - else if (passStructByRef) - { - size = 1; - } - } - - const var_types argType = args->GetNode()->TypeGet(); - if (args->GetNode()->OperIs(GT_PUTARG_TYPE)) - { - byteSize = genTypeSize(argType); - } - - // The 'size' value has now must have been set. (the original value of zero is an invalid value) - assert(size != 0); - assert(byteSize != 0); - -#if defined(OSX_ARM64_ABI) - // Arm64 Apple has a special ABI for passing small size arguments on stack, - // bytes are aligned to 1-byte, shorts to 2-byte, int/float to 4-byte, etc. - // It means passing 8 1-byte arguments on stack can take as small as 8 bytes. - unsigned argAlignBytes = eeGetArgAlignment(argType, isFloatHfa); -#endif - - // - // Figure out if the argument will be passed in a register. - // - - if (isRegParamType(genActualType(argx->TypeGet())) -#ifdef UNIX_AMD64_ABI - && (!isStructArg || structDesc.passedInRegisters) -#elif defined(TARGET_X86) - || (isStructArg && isTrivialPointerSizedStruct(objClass)) -#endif - ) - { -#ifdef TARGET_ARM - if (passUsingFloatRegs) - { - // First, see if it can be back-filled - if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) - (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot? - (size == 1)) // The size to back-fill is one float register - { - // Back-fill the register. - isBackFilled = true; - regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); - fltArgSkippedRegMask &= - ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask - nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); - assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG); - } - - // Does the entire float, double, or HFA fit in the FP arg registers? - // Check if the last register needed is still in the argument register range. - isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG; - - if (!isRegArg) - { - anyFloatStackArgs = true; - } - } - else - { - isRegArg = intArgRegNum < MAX_REG_ARG; - } -#elif defined(TARGET_ARM64) - if (passUsingFloatRegs) - { - // Check if the last register needed is still in the fp argument register range. - isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; - - // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers? - if (isHfaArg && !isRegArg) - { - // recompute the 'size' so that it represent the number of stack slots rather than the number of - // registers - // - unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); - size = roundupSize / TARGET_POINTER_SIZE; - - // We also must update fltArgRegNum so that we no longer try to - // allocate any new floating point registers for args - // This prevents us from backfilling a subsequent arg into d7 - // - fltArgRegNum = MAX_FLOAT_REG_ARG; - } - } - else - { - // Check if the last register needed is still in the int argument register range. - isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; - - // Did we run out of registers when we had a 16-byte struct (size===2) ? - // (i.e we only have one register remaining but we needed two registers to pass this arg) - // This prevents us from backfilling a subsequent arg into x7 - // - if (!isRegArg && (size > 1)) - { -#if defined(TARGET_WINDOWS) - // Arm64 windows native varargs allows splitting a 16 byte struct between stack - // and the last general purpose register. - if (callIsVararg) - { - // Override the decision and force a split. - isRegArg = isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs; - } - else -#endif // defined(TARGET_WINDOWS) - { - // We also must update intArgRegNum so that we no longer try to - // allocate any new general purpose registers for args - // - intArgRegNum = maxRegArgs; - } - } - } -#else // not TARGET_ARM or TARGET_ARM64 - -#if defined(UNIX_AMD64_ABI) - - // Here a struct can be passed in register following the classifications of its members and size. - // Now make sure there are actually enough registers to do so. - if (isStructArg) - { - unsigned int structFloatRegs = 0; - unsigned int structIntRegs = 0; - for (unsigned int i = 0; i < structDesc.eightByteCount; i++) - { - if (structDesc.IsIntegralSlot(i)) - { - structIntRegs++; - } - else if (structDesc.IsSseSlot(i)) - { - structFloatRegs++; - } - } - - isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) && - ((intArgRegNum + structIntRegs) <= MAX_REG_ARG); - } - else - { - if (passUsingFloatRegs) - { - isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG; - } - else - { - isRegArg = intArgRegNum < MAX_REG_ARG; - } - } -#else // !defined(UNIX_AMD64_ABI) - isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; -#endif // !defined(UNIX_AMD64_ABI) -#endif // TARGET_ARM - } - else - { - isRegArg = false; - } - - // If there are nonstandard args (outside the calling convention) they were inserted above - // and noted them in a table so we can recognize them here and build their argInfo. - // - // They should not affect the placement of any other args or stack space required. - // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. - isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum); - if (isNonStandard) - { - isRegArg = (nonStdRegNum != REG_STK); - } - else if (call->IsTailCallViaJitHelper()) - { - // We have already (before calling fgMorphArgs()) appended the 4 special args - // required by the x86 tailcall helper. These args are required to go on the - // stack. Force them to the stack here. - assert(numArgs >= 4); - if (argIndex >= numArgs - 4) - { - isRegArg = false; - } - } - - // Now we know if the argument goes in registers or not and how big it is. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef TARGET_ARM - // If we ever allocate a floating point argument to the stack, then all - // subsequent HFA/float/double arguments go on the stack. - if (!isRegArg && passUsingFloatRegs) - { - for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum) - { - fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); - } - } - - // If we think we're going to split a struct between integer registers and the stack, check to - // see if we've already assigned a floating-point arg to the stack. - if (isRegArg && // We decided above to use a register for the argument - !passUsingFloatRegs && // We're using integer registers - (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack - anyFloatStackArgs) // We've already used the stack for a floating-point argument - { - isRegArg = false; // Change our mind; don't pass this struct partially in registers - - // Skip the rest of the integer argument registers - for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum) - { - argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); - } - } -#endif // TARGET_ARM - - // Now create the fgArgTabEntry. - fgArgTabEntry* newArgEntry; - if (isRegArg) - { - regNumber nextRegNum = REG_STK; - -#if defined(UNIX_AMD64_ABI) - regNumber nextOtherRegNum = REG_STK; - unsigned int structFloatRegs = 0; - unsigned int structIntRegs = 0; -#endif // defined(UNIX_AMD64_ABI) - - if (isNonStandard) - { - nextRegNum = nonStdRegNum; - } -#if defined(UNIX_AMD64_ABI) - else if (isStructArg && structDesc.passedInRegisters) - { - // It is a struct passed in registers. Assign the next available register. - assert((structDesc.eightByteCount <= 2) && "Too many eightbytes."); - regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum}; - for (unsigned int i = 0; i < structDesc.eightByteCount; i++) - { - if (structDesc.IsIntegralSlot(i)) - { - *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs); - ++structIntRegs; - } - else if (structDesc.IsSseSlot(i)) - { - *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs); - ++structFloatRegs; - } - } - } -#endif // defined(UNIX_AMD64_ABI) - else - { - // fill in or update the argInfo table - nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) - : genMapIntRegArgNumToRegNum(intArgRegNum); - } - -#ifdef TARGET_AMD64 -#ifndef UNIX_AMD64_ABI - assert(size == 1); -#endif -#endif - - // This is a register argument - put it in the table - newArgEntry = - call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg, - isFloatHfa, callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) - UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) - UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) - UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); - newArgEntry->SetIsBackFilled(isBackFilled); - newArgEntry->isNonStandard = isNonStandard; - - // Set up the next intArgRegNum and fltArgRegNum values. - if (!isBackFilled) - { -#if defined(UNIX_AMD64_ABI) - if (isStructArg) - { - // For this case, we've already set the regNums in the argTabEntry - intArgRegNum += structIntRegs; - fltArgRegNum += structFloatRegs; - } - else -#endif // defined(UNIX_AMD64_ABI) - { - if (!isNonStandard) - { -#if FEATURE_ARG_SPLIT - // Check for a split (partially enregistered) struct - if (!passUsingFloatRegs && ((intArgRegNum + size) > MAX_REG_ARG)) - { - // This indicates a partial enregistration of a struct type - assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() || - (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG))); - unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum; - assert((unsigned char)numRegsPartial == numRegsPartial); - call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); - } -#endif // FEATURE_ARG_SPLIT - - if (passUsingFloatRegs) - { - fltArgRegNum += size; - -#ifdef WINDOWS_AMD64_ABI - // Whenever we pass an integer register argument - // we skip the corresponding floating point register argument - intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); -#endif // WINDOWS_AMD64_ABI - // No supported architecture supports partial structs using float registers. - assert(fltArgRegNum <= MAX_FLOAT_REG_ARG); - } - else - { - // Increment intArgRegNum by 'size' registers - intArgRegNum += size; - -#ifdef WINDOWS_AMD64_ABI - fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); -#endif // WINDOWS_AMD64_ABI - } - } - } - } - } - else // We have an argument that is not passed in a register - { - // This is a stack argument - put it in the table - newArgEntry = call->fgArgInfo->AddStkArg(argIndex, argx, args, size, byteSize, argAlignBytes, isStructArg, - isFloatHfa, callIsVararg); -#ifdef UNIX_AMD64_ABI - // TODO-Amd64-Unix-CQ: This is temporary (see also in fgMorphArgs). - if (structDesc.passedInRegisters) - { - newArgEntry->structDesc.CopyFrom(structDesc); - } -#endif - } - - if (GlobalJitOptions::compFeatureHfa) - { - if (isHfaArg) - { - newArgEntry->SetHfaType(hfaType, hfaSlots); - } - } - - newArgEntry->SetMultiRegNums(); - - noway_assert(newArgEntry != nullptr); - if (newArgEntry->isStruct) - { - newArgEntry->passedByRef = passStructByRef; - newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; - } - else - { - newArgEntry->argType = argx->TypeGet(); - } - - DEBUG_ARG_SLOTS_ONLY(argSlots += size;) - } // end foreach argument loop - -#ifdef DEBUG - if (verbose) - { - JITDUMP("ArgTable for %d.%s after fgInitArgInfo:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); - call->fgArgInfo->Dump(this); - JITDUMP("\n"); - } -#endif -} - -//------------------------------------------------------------------------ -// fgMorphArgs: Walk and transform (morph) the arguments of a call -// -// Arguments: -// callNode - the call for which we are doing the argument morphing -// -// Return Value: -// Like most morph methods, this method returns the morphed node, -// though in this case there are currently no scenarios where the -// node itself is re-created. -// -// Notes: -// This calls fgInitArgInfo to create the 'fgArgInfo' for the call. -// If it has already been created, that method will simply return. -// -// This method changes the state of the call node. It uses the existence -// of gtCallLateArgs (the late arguments list) to determine if it has -// already done the first round of morphing. -// -// The first time it is called (i.e. during global morphing), this method -// computes the "late arguments". This is when it determines which arguments -// need to be evaluated to temps prior to the main argument setup, and which -// can be directly evaluated into the argument location. It also creates a -// second argument list (gtCallLateArgs) that does the final placement of the -// arguments, e.g. into registers or onto the stack. -// -// The "non-late arguments", aka the gtCallArgs, are doing the in-order -// evaluation of the arguments that might have side-effects, such as embedded -// assignments, calls or possible throws. In these cases, it and earlier -// arguments must be evaluated to temps. -// -// On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS), -// if we have any nested calls, we need to defer the copying of the argument -// into the fixed argument area until after the call. If the argument did not -// otherwise need to be computed into a temp, it is moved to gtCallLateArgs and -// replaced in the "early" arg list (gtCallArgs) with a placeholder node. - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif -GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) -{ - GenTreeCall::Use* args; - GenTree* argx; - - unsigned flagsSummary = 0; - - unsigned argIndex = 0; - - DEBUG_ARG_SLOTS_ONLY(unsigned argSlots = 0;) - - bool reMorphing = call->AreArgsComplete(); - - // Set up the fgArgInfo. - fgInitArgInfo(call); - JITDUMP("%sMorphing args for %d.%s:\n", (reMorphing) ? "Re" : "", call->gtTreeID, GenTree::OpName(call->gtOper)); - - // If we are remorphing, process the late arguments (which were determined by a previous caller). - if (reMorphing) - { - for (GenTreeCall::Use& use : call->LateArgs()) - { - use.SetNode(fgMorphTree(use.GetNode())); - flagsSummary |= use.GetNode()->gtFlags; - } - - assert(call->fgArgInfo != nullptr); - } - call->fgArgInfo->RemorphReset(); - - // First we morph the argument subtrees ('this' pointer, arguments, etc.). - // During the first call to fgMorphArgs we also record the - // information about late arguments we have in 'fgArgInfo'. - // This information is used later to contruct the gtCallLateArgs */ - - // Process the 'this' argument value, if present. - if (call->gtCallThisArg != nullptr) - { - argx = call->gtCallThisArg->GetNode(); - fgArgTabEntry* thisArgEntry = call->fgArgInfo->GetArgEntry(0, reMorphing); - argx = fgMorphTree(argx); - call->gtCallThisArg->SetNode(argx); - // This is a register argument - possibly update it in the table. - call->fgArgInfo->UpdateRegArg(thisArgEntry, argx, reMorphing); - flagsSummary |= argx->gtFlags; - - if (!reMorphing && call->IsExpandedEarly() && call->IsVirtualVtable()) - { - if (!argx->OperIsLocal()) - { - thisArgEntry->needTmp = true; - call->fgArgInfo->SetNeedsTemps(); - } - } - assert(argIndex == 0); - argIndex++; - DEBUG_ARG_SLOTS_ONLY(argSlots++;) - } - - // Note that this name is a bit of a misnomer - it indicates that there are struct args - // that occupy more than a single slot that are passed by value (not necessarily in regs). - bool hasMultiregStructArgs = false; - for (args = call->gtCallArgs; args != nullptr; args = args->GetNext(), argIndex++) - { - GenTree** parentArgx = &args->NodeRef(); - fgArgTabEntry* argEntry = call->fgArgInfo->GetArgEntry(argIndex, reMorphing); - - // Morph the arg node, and update the parent and argEntry pointers. - argx = *parentArgx; - argx = fgMorphTree(argx); - *parentArgx = argx; - assert(argx == args->GetNode()); - - DEBUG_ARG_SLOTS_ONLY(unsigned size = argEntry->getSize();) - CORINFO_CLASS_HANDLE copyBlkClass = NO_CLASS_HANDLE; - -#if defined(DEBUG_ARG_SLOTS) - if (argEntry->GetByteAlignment() == 2 * TARGET_POINTER_SIZE) - { - if (argSlots % 2 == 1) - { - argSlots++; - } - } -#endif // DEBUG - if (argEntry->isNonStandard) - { - // We need to update the node field for this nonStandard arg here - // as it may have been changed by the call to fgMorphTree. - call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); - flagsSummary |= argx->gtFlags; - continue; - } - DEBUG_ARG_SLOTS_ASSERT(size != 0); - DEBUG_ARG_SLOTS_ONLY(argSlots += argEntry->getSlotCount();) - - if (argx->IsLocalAddrExpr() != nullptr) - { - argx->gtType = TYP_I_IMPL; - } - - // Get information about this argument. - var_types hfaType = argEntry->GetHfaType(); - bool isHfaArg = (hfaType != TYP_UNDEF); - bool passUsingFloatRegs = argEntry->isPassedInFloatRegisters(); - unsigned structSize = 0; - - // Struct arguments may be morphed into a node that is not a struct type. - // In such case the fgArgTabEntry keeps track of whether the original node (before morphing) - // was a struct and the struct classification. - bool isStructArg = argEntry->isStruct; - - GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/); - if (isStructArg && varTypeIsStruct(argObj) && !argObj->OperIs(GT_ASG, GT_MKREFANY, GT_FIELD_LIST, GT_ARGPLACE)) - { - CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(argObj); - unsigned originalSize; - if (argObj->TypeGet() == TYP_STRUCT) - { - if (argObj->OperIs(GT_OBJ)) - { - // Get the size off the OBJ node. - originalSize = argObj->AsObj()->GetLayout()->GetSize(); - assert(originalSize == info.compCompHnd->getClassSize(objClass)); - } - else - { - // We have a BADCODE assert for this in fgInitArgInfo. - assert(argObj->OperIs(GT_LCL_VAR)); - originalSize = lvaGetDesc(argObj->AsLclVarCommon())->lvExactSize; - } - } - else - { - originalSize = genTypeSize(argx); - assert(originalSize == info.compCompHnd->getClassSize(objClass)); - } - unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); - var_types structBaseType = argEntry->argType; - - // First, handle the case where the argument is passed by reference. - if (argEntry->passedByRef) - { - DEBUG_ARG_SLOTS_ASSERT(size == 1); - copyBlkClass = objClass; -#ifdef UNIX_AMD64_ABI - assert(!"Structs are not passed by reference on x64/ux"); -#endif // UNIX_AMD64_ABI - } - else // This is passed by value. - { - // Check to see if we can transform this into load of a primitive type. - // 'size' must be the number of pointer sized items - DEBUG_ARG_SLOTS_ASSERT(size == roundupSize / TARGET_POINTER_SIZE); - - structSize = originalSize; - unsigned passingSize = originalSize; - - // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size. - // When it can do this is platform-dependent: - // - In general, it can be done for power of 2 structs that fit in a single register. - // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field. - // - This is irrelevant for X86, since structs are always passed by value on the stack. - - GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj); - bool canTransform = false; - - if (structBaseType != TYP_STRUCT) - { - if (isPow2(passingSize)) - { - canTransform = (!argEntry->IsHfaArg() || (passingSize == genTypeSize(argEntry->GetHfaType()))); - } - -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) - // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can - // only transform in that case if the arg is a local. - // TODO-CQ: This transformation should be applicable in general, not just for the ARM64 - // or UNIX_AMD64_ABI cases where they will be passed in registers. - else - { - canTransform = (lclVar != nullptr); - passingSize = genTypeSize(structBaseType); - } -#endif // TARGET_ARM64 || UNIX_AMD64_ABI - } - - if (!canTransform) - { -#if defined(TARGET_AMD64) -#ifndef UNIX_AMD64_ABI - // On Windows structs are always copied and passed by reference (handled above) unless they are - // passed by value in a single register. - assert(size == 1); - copyBlkClass = objClass; -#else // UNIX_AMD64_ABI - // On Unix, structs are always passed by value. - // We only need a copy if we have one of the following: - // - The sizes don't match for a non-lclVar argument. - // - We have a known struct type (e.g. SIMD) that requires multiple registers. - // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not - // actually passed in registers. - if (argEntry->isPassedInRegisters()) - { - if (argObj->OperIs(GT_OBJ)) - { - if (passingSize != structSize) - { - copyBlkClass = objClass; - } - } - else if (lclVar == nullptr) - { - // This should only be the case of a value directly producing a known struct type. - assert(argObj->TypeGet() != TYP_STRUCT); - if (argEntry->numRegs > 1) - { - copyBlkClass = objClass; - } - } - } -#endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) - if ((passingSize != structSize) && (lclVar == nullptr)) - { - copyBlkClass = objClass; - } -#endif - -#ifdef TARGET_ARM - // TODO-1stClassStructs: Unify these conditions across targets. - if (((lclVar != nullptr) && - (lvaGetPromotionType(lclVar->AsLclVarCommon()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT)) || - ((argObj->OperIs(GT_OBJ)) && (passingSize != structSize))) - { - copyBlkClass = objClass; - } - - if (structSize < TARGET_POINTER_SIZE) - { - copyBlkClass = objClass; - } -#endif // TARGET_ARM - } - else - { - // We have a struct argument that fits into a register, and it is either a power of 2, - // or a local. - // Change our argument, as needed, into a value of the appropriate type. - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARM - DEBUG_ARG_SLOTS_ASSERT((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2))); -#else - DEBUG_ARG_SLOTS_ASSERT((size == 1) || (varTypeIsSIMD(structBaseType) && - size == (genTypeSize(structBaseType) / REGSIZE_BYTES))); -#endif - - assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize)); - - if (argObj->OperIs(GT_OBJ)) - { - argObj->ChangeOper(GT_IND); - - // Now see if we can fold *(&X) into X - if (argObj->AsOp()->gtOp1->gtOper == GT_ADDR) - { - GenTree* temp = argObj->AsOp()->gtOp1->AsOp()->gtOp1; - - // Keep the DONT_CSE flag in sync - // (as the addr always marks it for its op1) - temp->gtFlags &= ~GTF_DONT_CSE; - temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE); - DEBUG_DESTROY_NODE(argObj->AsOp()->gtOp1); // GT_ADDR - DEBUG_DESTROY_NODE(argObj); // GT_IND - - argObj = temp; - *parentArgx = temp; - argx = temp; - } - } - if (argObj->gtOper == GT_LCL_VAR) - { - unsigned lclNum = argObj->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (varDsc->lvPromoted) - { - if (varDsc->lvFieldCnt == 1) - { - // get the first and only promoted field - LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; - if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize) - { - // we will use the first and only promoted field - argObj->AsLclVarCommon()->SetLclNum(varDsc->lvFieldLclStart); - - if (varTypeIsEnregisterable(fieldVarDsc->TypeGet()) && - (genTypeSize(fieldVarDsc->TypeGet()) == originalSize)) - { - // Just use the existing field's type - argObj->gtType = fieldVarDsc->TypeGet(); - } - else - { - // Can't use the existing field's type, so use GT_LCL_FLD to swizzle - // to a new type - argObj->ChangeOper(GT_LCL_FLD); - argObj->gtType = structBaseType; - } - assert(varTypeIsEnregisterable(argObj->TypeGet())); - assert(copyBlkClass == NO_CLASS_HANDLE); - } - else - { - // use GT_LCL_FLD to swizzle the single field struct to a new type - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); - argObj->ChangeOper(GT_LCL_FLD); - argObj->gtType = structBaseType; - } - } - else - { - // The struct fits into a single register, but it has been promoted into its - // constituent fields, and so we have to re-assemble it - copyBlkClass = objClass; - } - } - else if (genTypeSize(varDsc->TypeGet()) != genTypeSize(structBaseType)) - { - // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD - argObj->ChangeOper(GT_LCL_FLD); - argObj->gtType = structBaseType; - } - } - else - { - // Not a GT_LCL_VAR, so we can just change the type on the node - argObj->gtType = structBaseType; - } - assert(varTypeIsEnregisterable(argObj->TypeGet()) || - ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType))); - } - -#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) - // TODO-CQ-XARCH: there is no need for a temp copy if we improve our code generation in - // `genPutStructArgStk` for xarch like we did it for Arm/Arm64. - - // We still have a struct unless we converted the GT_OBJ into a GT_IND above... - if (isHfaArg && passUsingFloatRegs) - { - } - else if (structBaseType == TYP_STRUCT) - { - // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, - // we must copyblk to a temp before doing the obj to avoid - // the obj reading memory past the end of the valuetype - CLANG_FORMAT_COMMENT_ANCHOR; - - if (roundupSize > originalSize) - { - copyBlkClass = objClass; - - // There are a few special cases where we can omit using a CopyBlk - // where we normally would need to use one. - - if (argObj->OperIs(GT_OBJ) && - argObj->AsObj()->gtGetOp1()->IsLocalAddrExpr() != nullptr) // Is the source a LclVar? - { - copyBlkClass = NO_CLASS_HANDLE; - } - } - } - -#endif // !UNIX_AMD64_ABI - } - } - - if (argEntry->isPassedInRegisters()) - { - call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); - } - else - { - call->fgArgInfo->UpdateStkArg(argEntry, argx, reMorphing); - } - - if (copyBlkClass != NO_CLASS_HANDLE) - { - fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); - } - - if (argx->gtOper == GT_MKREFANY) - { - // 'Lower' the MKREFANY tree and insert it. - noway_assert(!reMorphing); - -#ifdef TARGET_X86 - // Build the mkrefany as a GT_FIELD_LIST - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - fieldList->AddField(this, argx->AsOp()->gtGetOp1(), OFFSETOF__CORINFO_TypedReference__dataPtr, TYP_BYREF); - fieldList->AddField(this, argx->AsOp()->gtGetOp2(), OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL); - fgArgTabEntry* fp = gtArgEntryByNode(call, argx); - args->SetNode(fieldList); - assert(fp->GetNode() == fieldList); -#else // !TARGET_X86 - - // Get a new temp - // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany - unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument")); - lvaSetStruct(tmp, impGetRefAnyClass(), false); - - // Build the mkrefany as a comma node: - // (tmp.ptr=argx),(tmp.type=handle) - GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__dataPtr); - GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__type); - destPtrSlot->SetFieldSeq(GetFieldSeqStore()->CreateSingleton(GetRefanyDataField())); - destPtrSlot->gtFlags |= GTF_VAR_DEF; - destTypeSlot->SetFieldSeq(GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField())); - destTypeSlot->gtFlags |= GTF_VAR_DEF; - - GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->AsOp()->gtOp1); - GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->AsOp()->gtOp2); - GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot); - - // Change the expression to "(tmp=val)" - args->SetNode(asg); - - // EvalArgsToTemps will cause tmp to actually get loaded as the argument - call->fgArgInfo->EvalToTmp(argEntry, tmp, asg); - lvaSetVarAddrExposed(tmp); -#endif // !TARGET_X86 - } - -#if FEATURE_MULTIREG_ARGS - if (isStructArg) - { - if (((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) || - (isHfaArg && argx->TypeGet() == TYP_STRUCT)) - { - hasMultiregStructArgs = true; - } - } -#ifdef TARGET_ARM - else if ((argEntry->argType == TYP_LONG) || (argEntry->argType == TYP_DOUBLE)) - { - assert((argEntry->numRegs == 2) || (argEntry->numSlots == 2)); - } -#endif - else - { - // We must have exactly one register or slot. - assert(((argEntry->numRegs == 1) && (argEntry->GetStackSlotsNumber() == 0)) || - ((argEntry->numRegs == 0) && (argEntry->GetStackSlotsNumber() == 1))); - } -#endif - -#if defined(TARGET_X86) - if (isStructArg) - { - GenTree* lclNode = argx->OperIs(GT_LCL_VAR) ? argx : fgIsIndirOfAddrOfLocal(argx); - if ((lclNode != nullptr) && - (lvaGetPromotionType(lclNode->AsLclVarCommon()->GetLclNum()) == Compiler::PROMOTION_TYPE_INDEPENDENT)) - { - // Make a GT_FIELD_LIST of the field lclVars. - GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon(); - LclVarDsc* varDsc = lvaGetDesc(lcl); - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - - fgArgTabEntry* fp = gtArgEntryByNode(call, argx); - args->SetNode(fieldList); - assert(fp->GetNode() == fieldList); - - for (unsigned fieldLclNum = varDsc->lvFieldLclStart; - fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum) - { - LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); - GenTree* fieldLcl; - - if (fieldLclNum == varDsc->lvFieldLclStart) - { - lcl->SetLclNum(fieldLclNum); - lcl->ChangeOper(GT_LCL_VAR); - lcl->gtType = fieldVarDsc->TypeGet(); - fieldLcl = lcl; - } - else - { - fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); - } - - fieldList->AddField(this, fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); - } - } - } -#endif // TARGET_X86 - - flagsSummary |= args->GetNode()->gtFlags; - - } // end foreach argument loop - - if (!reMorphing) - { - call->fgArgInfo->ArgsComplete(); - } - - /* Process the function address, if indirect call */ - - if (call->gtCallType == CT_INDIRECT) - { - call->gtCallAddr = fgMorphTree(call->gtCallAddr); - // Const CSE may create an assignment node here - flagsSummary |= call->gtCallAddr->gtFlags; - } - -#if FEATURE_FIXED_OUT_ARGS - - // Record the outgoing argument size. If the call is a fast tail - // call, it will setup its arguments in incoming arg area instead - // of the out-going arg area, so we don't need to track the - // outgoing arg size. - if (!call->IsFastTailCall()) - { - -#if defined(UNIX_AMD64_ABI) - // This is currently required for the UNIX ABI to work correctly. - opts.compNeedToAlignFrame = true; -#endif // UNIX_AMD64_ABI - - const unsigned outgoingArgSpaceSize = GetOutgoingArgByteSize(call->fgArgInfo->GetNextSlotByteOffset()); - -#if defined(DEBUG_ARG_SLOTS) - unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum(); - assert(outgoingArgSpaceSize == preallocatedArgCount * REGSIZE_BYTES); -#endif - call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL)); - -#ifdef DEBUG - if (verbose) - { - const fgArgInfo* argInfo = call->fgArgInfo; -#if defined(DEBUG_ARG_SLOTS) - printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, nextSlotByteOffset=%d, " - "outgoingArgSpaceSize=%d\n", - argSlots, preallocatedArgCount, argInfo->GetNextSlotNum(), argInfo->GetNextSlotByteOffset(), - outgoingArgSpaceSize); -#else - printf("nextSlotByteOffset=%d, outgoingArgSpaceSize=%d\n", argInfo->GetNextSlotByteOffset(), - outgoingArgSpaceSize); -#endif - } -#endif - } -#endif // FEATURE_FIXED_OUT_ARGS - - // Clear the ASG and EXCEPT (if possible) flags on the call node - call->gtFlags &= ~GTF_ASG; - if (!call->OperMayThrow(this)) - { - call->gtFlags &= ~GTF_EXCEPT; - } - - // Union in the side effect flags from the call's operands - call->gtFlags |= flagsSummary & GTF_ALL_EFFECT; - - // If we are remorphing or don't have any register arguments or other arguments that need - // temps, then we don't need to call SortArgs() and EvalArgsToTemps(). - // - if (!reMorphing && (call->fgArgInfo->HasRegArgs() || call->fgArgInfo->NeedsTemps())) - { - // Do the 'defer or eval to temp' analysis. - - call->fgArgInfo->SortArgs(); - - call->fgArgInfo->EvalArgsToTemps(); - } - - if (hasMultiregStructArgs) - { - fgMorphMultiregStructArgs(call); - } - -#ifdef DEBUG - if (verbose) - { - JITDUMP("ArgTable for %d.%s after fgMorphArgs:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); - call->fgArgInfo->Dump(this); - JITDUMP("\n"); - } -#endif - return call; -} -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - -//----------------------------------------------------------------------------- -// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and -// call fgMorphMultiregStructArg on each of them. -// -// Arguments: -// call : a GenTreeCall node that has one or more TYP_STRUCT arguments\. -// -// Notes: -// We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types. -// It will ensure that the struct arguments are in the correct form. -// If this method fails to find any TYP_STRUCT arguments it will assert. -// -void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) -{ - bool foundStructArg = false; - unsigned flagsSummary = 0; - -#ifdef TARGET_X86 - assert(!"Logic error: no MultiregStructArgs for X86"); -#endif -#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI"); -#endif - - for (GenTreeCall::Use& use : call->Args()) - { - // For late arguments the arg tree that is overridden is in the gtCallLateArgs list. - // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.) - // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping - // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself, - // otherwise points to the list in the late args list. - bool isLateArg = (use.GetNode()->gtFlags & GTF_LATE_ARG) != 0; - fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, use.GetNode()); - assert(fgEntryPtr != nullptr); - GenTree* argx = fgEntryPtr->GetNode(); - GenTreeCall::Use* lateUse = nullptr; - GenTree* lateNode = nullptr; - - if (isLateArg) - { - for (GenTreeCall::Use& lateArgUse : call->LateArgs()) - { - GenTree* argNode = lateArgUse.GetNode(); - if (argx == argNode) - { - lateUse = &lateArgUse; - lateNode = argNode; - break; - } - } - assert((lateUse != nullptr) && (lateNode != nullptr)); - } - - if (!fgEntryPtr->isStruct) - { - continue; - } - - unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->GetStackSlotsNumber()); - if ((size > 1) || (fgEntryPtr->IsHfaArg() && argx->TypeGet() == TYP_STRUCT)) - { - foundStructArg = true; - if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) - { - if (fgEntryPtr->IsHfaRegArg()) - { - var_types hfaType = fgEntryPtr->GetHfaType(); - unsigned structSize; - if (argx->OperIs(GT_OBJ)) - { - structSize = argx->AsObj()->GetLayout()->GetSize(); - } - else if (varTypeIsSIMD(argx)) - { - structSize = genTypeSize(argx); - } - else - { - assert(argx->OperIs(GT_LCL_VAR)); - structSize = lvaGetDesc(argx->AsLclVar()->GetLclNum())->lvExactSize; - } - assert(structSize > 0); - if (structSize == genTypeSize(hfaType)) - { - if (argx->OperIs(GT_OBJ)) - { - argx->SetOper(GT_IND); - } - - argx->gtType = hfaType; - } - } - - GenTree* newArgx = fgMorphMultiregStructArg(argx, fgEntryPtr); - - // Did we replace 'argx' with a new tree? - if (newArgx != argx) - { - // link the new arg node into either the late arg list or the gtCallArgs list - if (isLateArg) - { - lateUse->SetNode(newArgx); - } - else - { - use.SetNode(newArgx); - } - - assert(fgEntryPtr->GetNode() == newArgx); - } - } - } - } - - // We should only call this method when we actually have one or more multireg struct args - assert(foundStructArg); - - // Update the flags - call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); -} - -//----------------------------------------------------------------------------- -// fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list, -// morph the argument as needed to be passed correctly. -// -// Arguments: -// arg - A GenTree node containing a TYP_STRUCT arg -// fgEntryPtr - the fgArgTabEntry information for the current 'arg' -// -// Notes: -// The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT. -// If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the -// stack are marked as doNotEnregister, and then we return. -// -// If it is passed by register, we mutate the argument into the GT_FIELD_LIST form -// which is only used for struct arguments. -// -// If arg is a LclVar we check if it is struct promoted and has the right number of fields -// and if they are at the appropriate offsets we will use the struct promted fields -// in the GT_FIELD_LIST nodes that we create. -// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements -// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct -// this also forces the struct to be stack allocated into the local frame. -// For the GT_OBJ case will clone the address expression and generate two (or more) -// indirections. -// Currently the implementation handles ARM64/ARM and will NYI for other architectures. -// -GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr) -{ - assert(varTypeIsStruct(arg->TypeGet())); - -#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) - NYI("fgMorphMultiregStructArg requires implementation for this target"); -#endif - -#ifdef TARGET_ARM - if ((fgEntryPtr->IsSplit() && fgEntryPtr->GetStackSlotsNumber() + fgEntryPtr->numRegs > 4) || - (!fgEntryPtr->IsSplit() && fgEntryPtr->GetRegNum() == REG_STK)) -#else - if (fgEntryPtr->GetRegNum() == REG_STK) -#endif - { - GenTreeLclVarCommon* lcl = nullptr; - GenTree* actualArg = arg->gtEffectiveVal(); - - if (actualArg->OperGet() == GT_OBJ) - { - if (actualArg->gtGetOp1()->OperIs(GT_ADDR) && actualArg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR)) - { - lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVarCommon(); - } - } - else if (actualArg->OperGet() == GT_LCL_VAR) - { - lcl = actualArg->AsLclVarCommon(); - } - if (lcl != nullptr) - { - if (lvaGetPromotionType(lcl->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT) - { - arg = fgMorphLclArgToFieldlist(lcl); - } - else if (arg->TypeGet() == TYP_STRUCT) - { - // If this is a non-register struct, it must be referenced from memory. - if (!actualArg->OperIs(GT_OBJ)) - { - // Create an Obj of the temp to use it as a call argument. - arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); - arg = gtNewObjNode(lvaGetStruct(lcl->GetLclNum()), arg); - } - // Its fields will need to be accessed by address. - lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUG_ARG(DNER_IsStructArg)); - } - } - - return arg; - } - -#if FEATURE_MULTIREG_ARGS - // Examine 'arg' and setup argValue objClass and structSize - // - const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg); - GenTree* argValue = arg; // normally argValue will be arg, but see right below - unsigned structSize = 0; - - if (arg->TypeGet() != TYP_STRUCT) - { - structSize = genTypeSize(arg->TypeGet()); - assert(structSize == info.compCompHnd->getClassSize(objClass)); - } - else if (arg->OperGet() == GT_OBJ) - { - GenTreeObj* argObj = arg->AsObj(); - const ClassLayout* objLayout = argObj->GetLayout(); - structSize = objLayout->GetSize(); - assert(structSize == info.compCompHnd->getClassSize(objClass)); - - // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR. - GenTree* op1 = argObj->gtOp1; - if (op1->OperGet() == GT_ADDR) - { - GenTree* underlyingTree = op1->AsOp()->gtOp1; - - // Only update to the same type. - if (underlyingTree->OperIs(GT_LCL_VAR)) - { - const GenTreeLclVar* lclVar = underlyingTree->AsLclVar(); - const LclVarDsc* varDsc = lvaGetDesc(lclVar); - if (ClassLayout::AreCompatible(varDsc->GetLayout(), objLayout)) - { - argValue = underlyingTree; - } - } - } - } - else if (arg->OperGet() == GT_LCL_VAR) - { - GenTreeLclVarCommon* varNode = arg->AsLclVarCommon(); - unsigned varNum = varNode->GetLclNum(); - assert(varNum < lvaCount); - LclVarDsc* varDsc = &lvaTable[varNum]; - - structSize = varDsc->lvExactSize; - assert(structSize == info.compCompHnd->getClassSize(objClass)); - } - else - { - structSize = info.compCompHnd->getClassSize(objClass); - } - - var_types hfaType = TYP_UNDEF; - var_types elemType = TYP_UNDEF; - unsigned elemCount = 0; - unsigned elemSize = 0; - var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0 - - hfaType = fgEntryPtr->GetHfaType(); - if (varTypeIsValidHfaType(hfaType) -#if !defined(HOST_UNIX) && defined(TARGET_ARM64) - && !fgEntryPtr->IsVararg() -#endif // !defined(HOST_UNIX) && defined(TARGET_ARM64) - ) - { - elemType = hfaType; - elemSize = genTypeSize(elemType); - elemCount = structSize / elemSize; - assert(elemSize * elemCount == structSize); - for (unsigned inx = 0; inx < elemCount; inx++) - { - type[inx] = elemType; - } - } - else - { - assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE); - BYTE gcPtrs[MAX_ARG_REG_COUNT]; - elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; - info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); - - for (unsigned inx = 0; inx < elemCount; inx++) - { -#ifdef UNIX_AMD64_ABI - if (gcPtrs[inx] == TYPE_GC_NONE) - { - type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx], - fgEntryPtr->structDesc.eightByteSizes[inx]); - } - else -#endif // UNIX_AMD64_ABI - { - type[inx] = getJitGCType(gcPtrs[inx]); - } - } - -#ifndef UNIX_AMD64_ABI - if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) - { - elemSize = TARGET_POINTER_SIZE; - // We can safely widen this to aligned bytes since we are loading from - // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and - // lives in the stack frame or will be a promoted field. - // - structSize = elemCount * TARGET_POINTER_SIZE; - } - else // we must have a GT_OBJ - { - assert(argValue->OperGet() == GT_OBJ); - - // We need to load the struct from an arbitrary address - // and we can't read past the end of the structSize - // We adjust the last load type here - // - unsigned remainingBytes = structSize % TARGET_POINTER_SIZE; - unsigned lastElem = elemCount - 1; - if (remainingBytes != 0) - { - switch (remainingBytes) - { - case 1: - type[lastElem] = TYP_BYTE; - break; - case 2: - type[lastElem] = TYP_SHORT; - break; -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) - case 4: - type[lastElem] = TYP_INT; - break; -#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) - default: - noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg"); - break; - } - } - } -#endif // !UNIX_AMD64_ABI - } - - // We should still have a TYP_STRUCT - assert(varTypeIsStruct(argValue->TypeGet())); - - GenTreeFieldList* newArg = nullptr; - - // Are we passing a struct LclVar? - // - if (argValue->OperGet() == GT_LCL_VAR) - { - GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); - unsigned varNum = varNode->GetLclNum(); - assert(varNum < lvaCount); - LclVarDsc* varDsc = &lvaTable[varNum]; - - // At this point any TYP_STRUCT LclVar must be an aligned struct - // or an HFA struct, both which are passed by value. - // - assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa()); - - varDsc->lvIsMultiRegArg = true; - -#ifdef DEBUG - if (verbose) - { - JITDUMP("Multireg struct argument V%02u : ", varNum); - fgEntryPtr->Dump(); - } -#endif // DEBUG - -#ifndef UNIX_AMD64_ABI - // This local variable must match the layout of the 'objClass' type exactly - if (varDsc->lvIsHfa() -#if !defined(HOST_UNIX) && defined(TARGET_ARM64) - && !fgEntryPtr->IsVararg() -#endif // !defined(HOST_UNIX) && defined(TARGET_ARM64) - ) - { - // We have a HFA struct. - noway_assert(elemType == varDsc->GetHfaType()); - noway_assert(elemSize == genTypeSize(elemType)); - noway_assert(elemCount == (varDsc->lvExactSize / elemSize)); - noway_assert(elemSize * elemCount == varDsc->lvExactSize); - - for (unsigned inx = 0; (inx < elemCount); inx++) - { - noway_assert(type[inx] == elemType); - } - } - else - { -#if defined(TARGET_ARM64) - // We must have a 16-byte struct (non-HFA) - noway_assert(elemCount == 2); -#elif defined(TARGET_ARM) - noway_assert(elemCount <= 4); -#endif - - for (unsigned inx = 0; inx < elemCount; inx++) - { - var_types currentGcLayoutType = varDsc->GetLayout()->GetGCPtrType(inx); - - // We setup the type[inx] value above using the GC info from 'objClass' - // This GT_LCL_VAR must have the same GC layout info - // - if (varTypeIsGC(currentGcLayoutType)) - { - noway_assert(type[inx] == currentGcLayoutType); - } - else - { - // We may have use a small type when we setup the type[inx] values above - // We can safely widen this to TYP_I_IMPL - type[inx] = TYP_I_IMPL; - } - } - } -#endif // !UNIX_AMD64_ABI - -#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) - // Is this LclVar a promoted struct with exactly 2 fields? - // TODO-ARM64-CQ: Support struct promoted HFA types here - if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && (!varDsc->lvIsHfa() -#if !defined(HOST_UNIX) && defined(TARGET_ARM64) - && !fgEntryPtr->IsVararg() -#endif // !defined(HOST_UNIX) && defined(TARGET_ARM64) - )) - { - // See if we have two promoted fields that start at offset 0 and 8? - unsigned loVarNum = lvaGetFieldLocal(varDsc, 0); - unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE); - - // Did we find the promoted fields at the necessary offsets? - if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM)) - { - LclVarDsc* loVarDsc = &lvaTable[loVarNum]; - LclVarDsc* hiVarDsc = &lvaTable[hiVarNum]; - - var_types loType = loVarDsc->lvType; - var_types hiType = hiVarDsc->lvType; - - if ((varTypeIsFloating(loType) != genIsValidFloatReg(fgEntryPtr->GetRegNum(0))) || - (varTypeIsFloating(hiType) != genIsValidFloatReg(fgEntryPtr->GetRegNum(1)))) - { - // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer - // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) - // - JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", - varNum); - // - // we call lvaSetVarDoNotEnregister and do the proper transformation below. - // - } - else - { - // We can use the struct promoted field as the two arguments - - // Create a new tree for 'arg' - // replace the existing LDOBJ(ADDR(LCLVAR)) - // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr)) - // - - newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); - newArg->AddField(this, gtNewLclvNode(loVarNum, loType), 0, loType); - newArg->AddField(this, gtNewLclvNode(hiVarNum, hiType), TARGET_POINTER_SIZE, hiType); - } - } - } - else - { - // - // We will create a list of GT_LCL_FLDs nodes to pass this struct - // - lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); - } -#elif defined(TARGET_ARM) - // Is this LclVar a promoted struct with exactly same size? - if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa()) - { - // See if we have promoted fields? - unsigned varNums[4]; - bool hasBadVarNum = false; - for (unsigned inx = 0; inx < elemCount; inx++) - { - varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx); - if (varNums[inx] == BAD_VAR_NUM) - { - hasBadVarNum = true; - break; - } - } - - // Did we find the promoted fields at the necessary offsets? - if (!hasBadVarNum) - { - LclVarDsc* varDscs[4]; - var_types varType[4]; - bool varIsFloat = false; - - for (unsigned inx = 0; inx < elemCount; inx++) - { - varDscs[inx] = &lvaTable[varNums[inx]]; - varType[inx] = varDscs[inx]->lvType; - if (varTypeIsFloating(varType[inx])) - { - // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the - // integer - // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) - // - JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", - varNum); - // - // we call lvaSetVarDoNotEnregister and do the proper transformation below. - // - varIsFloat = true; - break; - } - } - - if (!varIsFloat) - { - newArg = fgMorphLclArgToFieldlist(varNode); - } - } - } - else - { - // - // We will create a list of GT_LCL_FLDs nodes to pass this struct - // - lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); - } -#endif // TARGET_ARM - } - - // If we didn't set newarg to a new List Node tree - // - if (newArg == nullptr) - { - if (fgEntryPtr->GetRegNum() == REG_STK) - { - // We leave this stack passed argument alone - return arg; - } - - // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted ) - // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it? - // - if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) - { - GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); - unsigned varNum = varNode->GetLclNum(); - assert(varNum < lvaCount); - LclVarDsc* varDsc = &lvaTable[varNum]; - - unsigned baseOffset = varNode->GetLclOffs(); - unsigned lastOffset = baseOffset + structSize; - - // The allocated size of our LocalVar must be at least as big as lastOffset - assert(varDsc->lvSize() >= lastOffset); - - if (varDsc->HasGCPtr()) - { - // alignment of the baseOffset is required - noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0); -#ifndef UNIX_AMD64_ABI - noway_assert(elemSize == TARGET_POINTER_SIZE); -#endif - unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE; - ClassLayout* layout = varDsc->GetLayout(); - for (unsigned inx = 0; (inx < elemCount); inx++) - { - // The GC information must match what we setup using 'objClass' - if (layout->IsGCPtr(baseIndex + inx) || varTypeGCtype(type[inx])) - { - noway_assert(type[inx] == layout->GetGCPtrType(baseIndex + inx)); - } - } - } - else // this varDsc contains no GC pointers - { - for (unsigned inx = 0; inx < elemCount; inx++) - { - // The GC information must match what we setup using 'objClass' - noway_assert(!varTypeIsGC(type[inx])); - } - } - - // - // We create a list of GT_LCL_FLDs nodes to pass this struct - // - lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); - - // Create a new tree for 'arg' - // replace the existing LDOBJ(ADDR(LCLVAR)) - // with a FIELD_LIST(LCLFLD-LO, LCLFLD-HI) - // - unsigned offset = baseOffset; - newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); - for (unsigned inx = 0; inx < elemCount; inx++) - { - GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); - newArg->AddField(this, nextLclFld, offset, type[inx]); - offset += genTypeSize(type[inx]); - } - } - // Are we passing a GT_OBJ struct? - // - else if (argValue->OperGet() == GT_OBJ) - { - GenTreeObj* argObj = argValue->AsObj(); - GenTree* baseAddr = argObj->gtOp1; - var_types addrType = baseAddr->TypeGet(); - - if (baseAddr->OperGet() == GT_ADDR) - { - GenTree* addrTaken = baseAddr->AsOp()->gtOp1; - if (addrTaken->IsLocal()) - { - GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon(); - unsigned varNum = varNode->GetLclNum(); - // We access non-struct type (for example, long) as a struct type. - // Make sure lclVar lives on stack to make sure its fields are accessible by address. - lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField)); - } - } - - // Create a new tree for 'arg' - // replace the existing LDOBJ(EXPR) - // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...) - // - - newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); - unsigned offset = 0; - for (unsigned inx = 0; inx < elemCount; inx++) - { - GenTree* curAddr = baseAddr; - if (offset != 0) - { - GenTree* baseAddrDup = gtCloneExpr(baseAddr); - noway_assert(baseAddrDup != nullptr); - curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); - } - else - { - curAddr = baseAddr; - } - GenTree* curItem = gtNewIndir(type[inx], curAddr); - - // For safety all GT_IND should have at least GT_GLOB_REF set. - curItem->gtFlags |= GTF_GLOB_REF; - - newArg->AddField(this, curItem, offset, type[inx]); - offset += genTypeSize(type[inx]); - } - } - } - -#ifdef DEBUG - // If we reach here we should have set newArg to something - if (newArg == nullptr) - { - gtDispTree(argValue); - assert(!"Missing case in fgMorphMultiregStructArg"); - } -#endif - - noway_assert(newArg != nullptr); - -#ifdef DEBUG - if (verbose) - { - printf("fgMorphMultiregStructArg created tree:\n"); - gtDispTree(newArg); - } -#endif - - arg = newArg; // consider calling fgMorphTree(newArg); - -#endif // FEATURE_MULTIREG_ARGS - - return arg; -} - -//------------------------------------------------------------------------ -// fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields -// -// Arguments: -// lcl - The GT_LCL_VAR node we will transform -// -// Return value: -// The new GT_FIELD_LIST that we have created. -// -GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) -{ - LclVarDsc* varDsc = lvaGetDesc(lcl); - assert(varDsc->lvPromoted); - unsigned fieldCount = varDsc->lvFieldCnt; - unsigned fieldLclNum = varDsc->lvFieldLclStart; - - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - for (unsigned i = 0; i < fieldCount; i++) - { - LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); - GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); - fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); - fieldLclNum++; - } - return fieldList; -} - -//------------------------------------------------------------------------ -// fgMakeOutgoingStructArgCopy: make a copy of a struct variable if necessary, -// to pass to a callee. -// -// Arguments: -// call - call being processed -// args - args for the call -/// argIndex - arg being processed -// copyBlkClass - class handle for the struct -// -// Return value: -// tree that computes address of the outgoing arg -// -void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, - GenTreeCall::Use* args, - unsigned argIndex, - CORINFO_CLASS_HANDLE copyBlkClass) -{ - GenTree* argx = args->GetNode(); - noway_assert(argx->gtOper != GT_MKREFANY); - fgArgTabEntry* argEntry = Compiler::gtArgEntryByNode(call, argx); - - // If we're optimizing, see if we can avoid making a copy. - // - // We don't need a copy if this is the last use of an implicit by-ref local. - // - if (opts.OptimizationEnabled()) - { - GenTreeLclVar* const lcl = argx->IsImplicitByrefParameterValue(this); - - if (lcl != nullptr) - { - const unsigned varNum = lcl->GetLclNum(); - LclVarDsc* const varDsc = lvaGetDesc(varNum); - const unsigned short totalAppearances = varDsc->lvRefCnt(RCS_EARLY); - - // We don't have liveness so we rely on other indications of last use. - // - // We handle these cases: - // - // * (must not copy) If the call is a tail call, the use is a last use. - // We must skip the copy if we have a fast tail call. - // - // * (may not copy) if the call is noreturn, the use is a last use. - // We also check for just one reference here as we are not doing - // alias analysis of the call's parameters, or checking if the call - // site is not within some try region. - // - // * (may not copy) if there is exactly one use of the local in the method, - // and the call is not in loop, this is a last use. - // - const bool isTailCallLastUse = call->IsTailCall(); - const bool isCallLastUse = (totalAppearances == 1) && !fgMightHaveLoop(); - const bool isNoReturnLastUse = (totalAppearances == 1) && call->IsNoReturn(); - if (isTailCallLastUse || isCallLastUse || isNoReturnLastUse) - { - varDsc->setLvRefCnt(0, RCS_EARLY); - args->SetNode(lcl); - assert(argEntry->GetNode() == lcl); - - JITDUMP("did not need to make outgoing copy for last use of implicit byref V%2d\n", varNum); - return; - } - } - } - - JITDUMP("making an outgoing copy for struct arg\n"); - - if (fgOutgoingArgTemps == nullptr) - { - fgOutgoingArgTemps = hashBv::Create(this); - } - - unsigned tmp = 0; - bool found = false; - - // Attempt to find a local we have already used for an outgoing struct and reuse it. - // We do not reuse within a statement. - if (!opts.MinOpts()) - { - indexType lclNum; - FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps) - { - LclVarDsc* varDsc = &lvaTable[lclNum]; - if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) && - !fgCurrentlyInUseArgTemps->testBit(lclNum)) - { - tmp = (unsigned)lclNum; - found = true; - JITDUMP("reusing outgoing struct arg"); - break; - } - } - NEXT_HBV_BIT_SET; - } - - // Create the CopyBlk tree and insert it. - if (!found) - { - // Get a new temp - // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk. - tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument")); - lvaSetStruct(tmp, copyBlkClass, false); - if (call->IsVarargs()) - { - lvaSetStructUsedAsVarArg(tmp); - } - - fgOutgoingArgTemps->setBit(tmp); - } - - fgCurrentlyInUseArgTemps->setBit(tmp); - - // TYP_SIMD structs should not be enregistered, since ABI requires it to be - // allocated on stack and address of it needs to be passed. - if (lclVarIsSIMDType(tmp)) - { - lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct)); - } - - // Create a reference to the temp - GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType); - dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction. - - if (argx->gtOper == GT_OBJ) - { - argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT); - argx->SetIndirExceptionFlags(this); - } - else - { - argx->gtFlags |= GTF_DONT_CSE; - } - - // Copy the valuetype to the temp - GenTree* copyBlk = gtNewBlkOpNode(dest, argx, false /* not volatile */, true /* copyBlock */); - copyBlk = fgMorphCopyBlock(copyBlk); - -#if FEATURE_FIXED_OUT_ARGS - - // Do the copy early, and evalute the temp later (see EvalArgsToTemps) - // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode - GenTree* arg = copyBlk; - -#else // FEATURE_FIXED_OUT_ARGS - - // Structs are always on the stack, and thus never need temps - // so we have to put the copy and temp all into one expression. - argEntry->tmpNum = tmp; - GenTree* arg = fgMakeTmpArgNode(argEntry); - - // Change the expression to "(tmp=val),tmp" - arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); - -#endif // FEATURE_FIXED_OUT_ARGS - - args->SetNode(arg); - call->fgArgInfo->EvalToTmp(argEntry, tmp, arg); - - return; -} - -#ifdef TARGET_ARM -// See declaration for specification comment. -void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, - unsigned firstArgRegNum, - regMaskTP* pArgSkippedRegMask) -{ - assert(varDsc->lvPromoted); - // There's no way to do these calculations without breaking abstraction and assuming that - // integer register arguments are consecutive ints. They are on ARM. - - // To start, figure out what register contains the last byte of the first argument. - LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; - unsigned lastFldRegOfLastByte = - (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; - ; - - // Now we're keeping track of the register that the last field ended in; see what registers - // subsequent fields start in, and whether any are skipped. - // (We assume here the invariant that the fields are sorted in offset order.) - for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++) - { - unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset; - LclVarDsc* fldVarDsc = &lvaTable[fldVarNum]; - unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE; - assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields. - // This loop should enumerate the offsets of any registers skipped. - // Find what reg contains the last byte: - // And start at the first register after that. If that isn't the first reg of the current - for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset; - skippedRegOffsets++) - { - // If the register number would not be an arg reg, we're done. - if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG) - return; - *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets)); - } - lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; - } -} - -#endif // TARGET_ARM - -/***************************************************************************** - * - * A little helper used to rearrange nested commutative operations. The - * effect is that nested associative, commutative operations are transformed - * into a 'left-deep' tree, i.e. into something like this: - * - * (((a op b) op c) op d) op... - */ - -#if REARRANGE_ADDS - -void Compiler::fgMoveOpsLeft(GenTree* tree) -{ - GenTree* op1; - GenTree* op2; - genTreeOps oper; - - do - { - op1 = tree->AsOp()->gtOp1; - op2 = tree->AsOp()->gtOp2; - oper = tree->OperGet(); - - noway_assert(GenTree::OperIsCommutative(oper)); - noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL); - noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder); - noway_assert(oper == op2->gtOper); - - // Commutativity doesn't hold if overflow checks are needed - - if (tree->gtOverflowEx() || op2->gtOverflowEx()) - { - return; - } - - if (gtIsActiveCSE_Candidate(op2)) - { - // If we have marked op2 as a CSE candidate, - // we can't perform a commutative reordering - // because any value numbers that we computed for op2 - // will be incorrect after performing a commutative reordering - // - return; - } - - if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT)) - { - return; - } - - // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators - if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)) - { - return; - } - - if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN) - { - // We could deal with this, but we were always broken and just hit the assert - // below regarding flags, which means it's not frequent, so will just bail out. - // See #195514 - return; - } - - noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx()); - - GenTree* ad1 = op2->AsOp()->gtOp1; - GenTree* ad2 = op2->AsOp()->gtOp2; - - // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT - // We can not reorder such GT_OR trees - // - if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet())) - { - break; - } - - // Don't split up a byref calculation and create a new byref. E.g., - // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int). - // Doing this transformation could create a situation where the first - // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that - // no longer points within the ref object. If a GC happens, the byref won't - // get updated. This can happen, for instance, if one of the int components - // is negative. It also requires the address generation be in a fully-interruptible - // code region. - // - if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL) - { - assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD)); - break; - } - - /* Change "(x op (y op z))" to "(x op y) op z" */ - /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */ - - GenTree* new_op1 = op2; - - new_op1->AsOp()->gtOp1 = op1; - new_op1->AsOp()->gtOp2 = ad1; - - /* Change the flags. */ - - // Make sure we arent throwing away any flags - noway_assert((new_op1->gtFlags & - ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag. - GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated - GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0); - - new_op1->gtFlags = - (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag. - (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT); - - /* Retype new_op1 if it has not/become a GC ptr. */ - - if (varTypeIsGC(op1->TypeGet())) - { - noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && - oper == GT_ADD) || // byref(ref + (int+int)) - (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && - oper == GT_OR)); // int(gcref | int(gcref|intval)) - - new_op1->gtType = tree->gtType; - } - else if (varTypeIsGC(ad2->TypeGet())) - { - // Neither ad1 nor op1 are GC. So new_op1 isnt either - noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); - new_op1->gtType = TYP_I_IMPL; - } - - // If new_op1 is a new expression. Assign it a new unique value number. - // vnStore is null before the ValueNumber phase has run - if (vnStore != nullptr) - { - // We can only keep the old value number on new_op1 if both op1 and ad2 - // have the same non-NoVN value numbers. Since op is commutative, comparing - // only ad2 and op1 is enough. - if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || - (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || - (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal())) - { - new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet())); - } - } - - tree->AsOp()->gtOp1 = new_op1; - tree->AsOp()->gtOp2 = ad2; - - /* If 'new_op1' is now the same nested op, process it recursively */ - - if ((ad1->gtOper == oper) && !ad1->gtOverflowEx()) - { - fgMoveOpsLeft(new_op1); - } - - /* If 'ad2' is now the same nested op, process it - * Instead of recursion, we set up op1 and op2 for the next loop. - */ - - op1 = new_op1; - op2 = ad2; - } while ((op2->gtOper == oper) && !op2->gtOverflowEx()); - - return; -} - -#endif - -/*****************************************************************************/ - -void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay) -{ - if (tree->OperIsBoundsCheck()) - { - GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk(); - BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay); - if (failBlock != nullptr) - { - boundsChk->gtIndRngFailBB = failBlock; - } - } - else if (tree->OperIs(GT_INDEX_ADDR)) - { - GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr(); - BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); - if (failBlock != nullptr) - { - indexAddr->gtIndRngFailBB = failBlock; - } - } - else - { - noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX)); - fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); - } -} - -BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay) -{ - if (opts.MinOpts()) - { - delay = false; - } - - if (!opts.compDbgCode) - { - if (!delay && !compIsForInlining()) - { - // Create/find the appropriate "range-fail" label - return fgRngChkTarget(compCurBB, kind); - } - } - - return nullptr; -} - -/***************************************************************************** - * - * Expand a GT_INDEX node and fully morph the child operands - * - * The orginal GT_INDEX node is bashed into the GT_IND node that accesses - * the array element. We expand the GT_INDEX node into a larger tree that - * evaluates the array base and index. The simplest expansion is a GT_COMMA - * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag. - * For complex array or index expressions one or more GT_COMMA assignments - * are inserted so that we only evaluate the array or index expressions once. - * - * The fully expanded tree is then morphed. This causes gtFoldExpr to - * perform local constant prop and reorder the constants in the tree and - * fold them. - * - * We then parse the resulting array element expression in order to locate - * and label the constants and variables that occur in the tree. - */ - -const int MAX_ARR_COMPLEXITY = 4; -const int MAX_INDEX_COMPLEXITY = 4; - -GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) -{ - noway_assert(tree->gtOper == GT_INDEX); - GenTreeIndex* asIndex = tree->AsIndex(); - var_types elemTyp = asIndex->TypeGet(); - unsigned elemSize = asIndex->gtIndElemSize; - CORINFO_CLASS_HANDLE elemStructType = asIndex->gtStructElemClass; - - noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr); - - // Fold "cns_str"[cns_index] to ushort constant - if (opts.OptimizationEnabled() && asIndex->Arr()->OperIs(GT_CNS_STR) && asIndex->Index()->IsIntCnsFitsInI32()) - { - const int cnsIndex = static_cast(asIndex->Index()->AsIntConCommon()->IconValue()); - if (cnsIndex >= 0) - { - int length; - const char16_t* str = info.compCompHnd->getStringLiteral(asIndex->Arr()->AsStrCon()->gtScpHnd, - asIndex->Arr()->AsStrCon()->gtSconCPX, &length); - if ((cnsIndex < length) && (str != nullptr)) - { - GenTree* cnsCharNode = gtNewIconNode(str[cnsIndex], elemTyp); - INDEBUG(cnsCharNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return cnsCharNode; - } - } - } - -#ifdef FEATURE_SIMD - if (featureSIMD && varTypeIsStruct(elemTyp) && structSizeMightRepresentSIMDType(elemSize)) - { - // If this is a SIMD type, this is the point at which we lose the type information, - // so we need to set the correct type on the GT_IND. - // (We don't care about the base type here, so we only check, but don't retain, the return value). - unsigned simdElemSize = 0; - if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN) - { - assert(simdElemSize == elemSize); - elemTyp = getSIMDTypeForSize(elemSize); - // This is the new type of the node. - tree->gtType = elemTyp; - // Now set elemStructType to null so that we don't confuse value numbering. - elemStructType = nullptr; - } - } -#endif // FEATURE_SIMD - - // Set up the array length's offset into lenOffs - // And the first element's offset into elemOffs - ssize_t lenOffs; - ssize_t elemOffs; - if (tree->gtFlags & GTF_INX_STRING_LAYOUT) - { - lenOffs = OFFSETOF__CORINFO_String__stringLen; - elemOffs = OFFSETOF__CORINFO_String__chars; - tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE - } - else - { - // We have a standard array - lenOffs = OFFSETOF__CORINFO_Array__length; - elemOffs = OFFSETOF__CORINFO_Array__data; - } - - // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts - // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down. - // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion - // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in - // minopts). - // - // When we *are* optimizing, we fully expand GT_INDEX to: - // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or - // side-effecting. - // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or - // side-effecting. - // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array)) - // 4. Compute the address of the element that will be accessed: - // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize)) - // 5. Dereference the address with a GT_IND. - // - // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows - // for more straightforward bounds-check removal, CSE, etc. - if (opts.MinOpts()) - { - GenTree* const array = fgMorphTree(asIndex->Arr()); - GenTree* const index = fgMorphTree(asIndex->Index()); - - GenTreeIndexAddr* const indexAddr = - new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize, - static_cast(lenOffs), static_cast(elemOffs)); - indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT; - - // Mark the indirection node as needing a range check if necessary. - // Note this will always be true unless JitSkipArrayBoundCheck() is used - if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0) - { - fgSetRngChkTarget(indexAddr); - } - - // Change `tree` into an indirection and return. - tree->ChangeOper(GT_IND); - GenTreeIndir* const indir = tree->AsIndir(); - indir->Addr() = indexAddr; - bool canCSE = indir->CanCSE(); - indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT); - if (!canCSE) - { - indir->SetDoNotCSE(); - } - -#ifdef DEBUG - indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - - return indir; - } - - GenTree* arrRef = asIndex->Arr(); - GenTree* index = asIndex->Index(); - - bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled - bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0); - - GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression - GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression - GenTree* bndsChk = nullptr; - - // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address. - if (chkd) - { - GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression - GenTree* index2 = nullptr; - - // If the arrRef or index expressions involves an assignment, a call or reads from global memory, - // then we *must* allocate a temporary in which to "localize" those values, to ensure that the - // same values are used in the bounds check and the actual dereference. - // Also we allocate the temporary when the expresion is sufficiently complex/expensive. - // - // Note that if the expression is a GT_FIELD, it has not yet been morphed so its true complexity is - // not exposed. Without that condition there are cases of local struct fields that were previously, - // needlessly, marked as GTF_GLOB_REF, and when that was fixed, there were some regressions that - // were mostly ameliorated by adding this condition. - // - // Likewise, allocate a temporary if the expression is a GT_LCL_FLD node. These used to be created - // after fgMorphArrayIndex from GT_FIELD trees so this preserves the existing behavior. This is - // perhaps a decision that should be left to CSE but FX diffs show that it is slightly better to - // do this here. - - if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || - gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || arrRef->OperIs(GT_FIELD, GT_LCL_FLD)) - { - unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr")); - arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef); - arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); - arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); - } - else - { - arrRef2 = gtCloneExpr(arrRef); - noway_assert(arrRef2 != nullptr); - } - - if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) || - index->OperIs(GT_FIELD, GT_LCL_FLD)) - { - unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("index expr")); - indexDefn = gtNewTempAssign(indexTmpNum, index); - index = gtNewLclvNode(indexTmpNum, index->TypeGet()); - index2 = gtNewLclvNode(indexTmpNum, index->TypeGet()); - } - else - { - index2 = gtCloneExpr(index); - noway_assert(index2 != nullptr); - } - - // Next introduce a GT_ARR_BOUNDS_CHECK node - var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. - -#ifdef TARGET_64BIT - // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case - // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case, - // the comparison will have to be widen to 64 bits. - if (index->TypeGet() == TYP_I_IMPL) - { - bndsChkType = TYP_I_IMPL; - } -#endif // TARGET_64BIT - - GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); - - if (bndsChkType != TYP_INT) - { - arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType); - } - - GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) - GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL); - - bndsChk = arrBndsChk; - - // Now we'll switch to using the second copies for arrRef and index - // to compute the address expression - - arrRef = arrRef2; - index = index2; - } - - // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))" - - GenTree* addr; - -#ifdef TARGET_64BIT - // Widen 'index' on 64-bit targets - if (index->TypeGet() != TYP_I_IMPL) - { - if (index->OperGet() == GT_CNS_INT) - { - index->gtType = TYP_I_IMPL; - } - else - { - index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL); - } - } -#endif // TARGET_64BIT - - /* Scale the index value if necessary */ - if (elemSize > 1) - { - GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL); - - // Fix 392756 WP7 Crossgen - // - // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node - // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar. - // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE. - // - size->gtFlags |= GTF_DONT_CSE; - - /* Multiply by the array element size */ - addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size); - } - else - { - addr = index; - } - - // Be careful to only create the byref pointer when the full index expression is added to the array reference. - // We don't want to create a partial byref address expression that doesn't include the full index offset: - // a byref must point within the containing object. It is dangerous (especially when optimizations come into - // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that - // the partial byref will not point within the object, and thus not get updated correctly during a GC. - // This is mostly a risk in fully-interruptible code regions. - // - // NOTE: the tree form created here is pattern matched by optExtractArrIndex(), so changes here must - // be reflected there. - - /* Add the first element's offset */ - - GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL); - - addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns); - - /* Add the object ref to the element's offset */ - - addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); - - assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) || - (GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL)); - - // Change the orginal GT_INDEX node into a GT_IND node - tree->SetOper(GT_IND); - - // If the index node is a floating-point type, notify the compiler - // we'll potentially use floating point registers at the time of codegen. - if (varTypeUsesFloatReg(tree->gtType)) - { - this->compFloatingPointUsed = true; - } - - // We've now consumed the GTF_INX_RNGCHK, and the node - // is no longer a GT_INDEX node. - tree->gtFlags &= ~GTF_INX_RNGCHK; - - tree->AsOp()->gtOp1 = addr; - - // This is an array index expression. - tree->gtFlags |= GTF_IND_ARR_INDEX; - - // If there's a bounds check, the indir won't fault. - if (bndsChk) - { - tree->gtFlags |= GTF_IND_NONFAULTING; - } - else - { - tree->gtFlags |= GTF_EXCEPT; - } - - if (nCSE) - { - tree->gtFlags |= GTF_DONT_CSE; - } - - // Store information about it. - GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType)); - - // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it. - - GenTree* indTree = tree; - - // Did we create a bndsChk tree? - if (bndsChk) - { - // Use a GT_COMMA node to prepend the array bound check - // - tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree); - - /* Mark the indirection node as needing a range check */ - fgSetRngChkTarget(bndsChk); - } - - if (indexDefn != nullptr) - { - // Use a GT_COMMA node to prepend the index assignment - // - tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree); - } - if (arrRefDefn != nullptr) - { - // Use a GT_COMMA node to prepend the arRef assignment - // - tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree); - } - - // Currently we morph the tree to perform some folding operations prior - // to attaching fieldSeq info and labeling constant array index contributions - // - fgMorphTree(tree); - - // Ideally we just want to proceed to attaching fieldSeq info and labeling the - // constant array index contributions, but the morphing operation may have changed - // the 'tree' into something that now unconditionally throws an exception. - // - // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified - // or it could be left unchanged. If it is unchanged then we should not return, - // instead we should proceed to attaching fieldSeq info, etc... - // - GenTree* arrElem = tree->gtEffectiveVal(); - - if (fgIsCommaThrow(tree)) - { - if ((arrElem != indTree) || // A new tree node may have been created - (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT - { - return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc.. - } - } - - assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)); - - addr = arrElem->AsOp()->gtOp1; - - assert(addr->TypeGet() == TYP_BYREF); - - GenTree* cnsOff = nullptr; - if (addr->OperGet() == GT_ADD) - { - assert(addr->TypeGet() == TYP_BYREF); - assert(addr->AsOp()->gtOp1->TypeGet() == TYP_REF); - - addr = addr->AsOp()->gtOp2; - - // Look for the constant [#FirstElem] node here, or as the RHS of an ADD. - - if (addr->gtOper == GT_CNS_INT) - { - cnsOff = addr; - addr = nullptr; - } - else - { - if ((addr->OperGet() == GT_ADD) && (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT)) - { - cnsOff = addr->AsOp()->gtOp2; - addr = addr->AsOp()->gtOp1; - } - - // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX - addr->LabelIndex(this); - } - } - else if (addr->OperGet() == GT_CNS_INT) - { - cnsOff = addr; - } - - FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField); - - if ((cnsOff != nullptr) && (cnsOff->AsIntCon()->gtIconVal == elemOffs)) - { - // Assign it the [#FirstElem] field sequence - // - cnsOff->AsIntCon()->gtFieldSeq = firstElemFseq; - } - else // We have folded the first element's offset with the index expression - { - // Build the [#ConstantIndex, #FirstElem] field sequence - // - FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); - FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq); - - if (cnsOff == nullptr) // It must have folded into a zero offset - { - // Record in the general zero-offset map. - fgAddFieldSeqForZeroOffset(addr, fieldSeq); - } - else - { - cnsOff->AsIntCon()->gtFieldSeq = fieldSeq; - } - } - - return tree; -} - -#ifdef TARGET_X86 -/***************************************************************************** - * - * Wrap fixed stack arguments for varargs functions to go through varargs - * cookie to access them, except for the cookie itself. - * - * Non-x86 platforms are allowed to access all arguments directly - * so we don't need this code. - * - */ -GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs) -{ - /* For the fixed stack arguments of a varargs function, we need to go - through the varargs cookies to access them, except for the - cookie itself */ - - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg) - { - // Create a node representing the local pointing to the base of the args - GenTree* ptrArg = - gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL), - gtNewIconNode(varDsc->GetStackOffset() - - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES - lclOffs)); - - // Access the argument through the local - GenTree* tree; - if (varTypeIsStruct(varType)) - { - CORINFO_CLASS_HANDLE typeHnd = varDsc->GetStructHnd(); - assert(typeHnd != nullptr); - tree = gtNewObjNode(typeHnd, ptrArg); - } - else - { - tree = gtNewOperNode(GT_IND, varType, ptrArg); - } - tree->gtFlags |= GTF_IND_TGTANYWHERE; - - if (varDsc->lvAddrExposed) - { - tree->gtFlags |= GTF_GLOB_REF; - } - - return fgMorphTree(tree); - } - - return NULL; -} -#endif - -/***************************************************************************** - * - * Transform the given GT_LCL_VAR tree for code generation. - */ - -GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph) -{ - assert(tree->gtOper == GT_LCL_VAR); - - unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); - var_types varType = lvaGetRealType(lclNum); - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (varDsc->lvAddrExposed) - { - tree->gtFlags |= GTF_GLOB_REF; - } - -#ifdef TARGET_X86 - if (info.compIsVarArgs) - { - GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0); - if (newTree != nullptr) - { - if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) - { - newTree->SetOper(GT_IND); - } - return newTree; - } - } -#endif // TARGET_X86 - - /* If not during the global morphing phase bail */ - - if (!fgGlobalMorph && !forceRemorph) - { - return tree; - } - - bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0; - - noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr - - if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad()) - { -#if LOCAL_ASSERTION_PROP - /* Assertion prop can tell us to omit adding a cast here */ - if (optLocalAssertionProp && optAssertionIsSubrange(tree, TYP_INT, varType, apFull) != NO_ASSERTION_INDEX) - { - return tree; - } -#endif - /* Small-typed arguments and aliased locals are normalized on load. - Other small-typed locals are normalized on store. - Also, under the debugger as the debugger could write to the variable. - If this is one of the former, insert a narrowing cast on the load. - ie. Convert: var-short --> cast-short(var-int) */ - - tree->gtType = TYP_INT; - fgMorphTreeDone(tree); - tree = gtNewCastNode(TYP_INT, tree, false, varType); - fgMorphTreeDone(tree); - return tree; - } - - return tree; -} - -/***************************************************************************** - Grab a temp for big offset morphing. - This method will grab a new temp if no temp of this "type" has been created. - Or it will return the same cached one if it has been created. -*/ -unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type) -{ - unsigned lclNum = fgBigOffsetMorphingTemps[type]; - - if (lclNum == BAD_VAR_NUM) - { - // We haven't created a temp for this kind of type. Create one now. - lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing")); - fgBigOffsetMorphingTemps[type] = lclNum; - } - else - { - // We better get the right type. - noway_assert(lvaTable[lclNum].TypeGet() == type); - } - - noway_assert(lclNum != BAD_VAR_NUM); - return lclNum; -} - -/***************************************************************************** - * - * Transform the given GT_FIELD tree for code generation. - */ - -GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) -{ - assert(tree->gtOper == GT_FIELD); - - CORINFO_FIELD_HANDLE symHnd = tree->AsField()->gtFldHnd; - unsigned fldOffset = tree->AsField()->gtFldOffset; - GenTree* objRef = tree->AsField()->gtFldObj; - bool fieldMayOverlap = false; - bool objIsLocal = false; - - if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR)) - { - // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter. - // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the - // simd field rewrites are sensitive to. - fgMorphImplicitByRefArgs(objRef); - } - - noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) || - ((tree->gtFlags & GTF_GLOB_REF) != 0)); - - if (tree->AsField()->gtFldMayOverlap) - { - fieldMayOverlap = true; - // Reset the flag because we may reuse the node. - tree->AsField()->gtFldMayOverlap = false; - } - -#ifdef FEATURE_SIMD - // if this field belongs to simd struct, translate it to simd intrinsic. - if (mac == nullptr) - { - GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree); - if (newTree != tree) - { - newTree = fgMorphSmpOp(newTree); - return newTree; - } - } - else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1())) - { - GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr(); - if (lcl != nullptr) - { - lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUGARG(DNER_LocalField)); - } - } -#endif - - /* Is this an instance data member? */ - - if (objRef) - { - GenTree* addr; - objIsLocal = objRef->IsLocal(); - - if (tree->gtFlags & GTF_IND_TLS_REF) - { - NO_WAY("instance field can not be a TLS ref."); - } - - /* We'll create the expression "*(objRef + mem_offs)" */ - - noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL); - - /* - Now we have a tree like this: - - +--------------------+ - | GT_FIELD | tree - +----------+---------+ - | - +--------------+-------------+ - | tree->AsField()->gtFldObj | - +--------------+-------------+ - - - We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): - - +--------------------+ - | GT_IND/GT_OBJ | tree - +---------+----------+ - | - | - +---------+----------+ - | GT_ADD | addr - +---------+----------+ - | - / \ - / \ - / \ - +-------------------+ +----------------------+ - | objRef | | fldOffset | - | | | (when fldOffset !=0) | - +-------------------+ +----------------------+ - - - or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): - - - +--------------------+ - | GT_IND/GT_OBJ | tree - +----------+---------+ - | - +----------+---------+ - | GT_COMMA | comma2 - +----------+---------+ - | - / \ - / \ - / \ - / \ - +---------+----------+ +---------+----------+ - comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr - +---------+----------+ +---------+----------+ - | | - / \ / \ - / \ / \ - / \ / \ - +-----+-----+ +-----+-----+ +---------+ +-----------+ - asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset | - +-----+-----+ +-----+-----+ +---------+ +-----------+ - | | - / \ | - / \ | - / \ | - +-----+-----+ +-----+-----+ +-----------+ - | tmpLcl | | objRef | | tmpLcl | - +-----------+ +-----------+ +-----------+ - - - */ - - var_types objRefType = objRef->TypeGet(); - - GenTree* comma = nullptr; - - // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field, - // and thus is equivalent to a MACK_Ind with zero offset. - MorphAddrContext defMAC(MACK_Ind); - if (mac == nullptr) - { - mac = &defMAC; - } - - // This flag is set to enable the "conservative" style of explicit null-check insertion. - // This means that we insert an explicit null check whenever we create byref by adding a - // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately - // dereferenced). The alternative is "aggressive", which would not insert such checks (for - // small offsets); in this plan, we would transfer some null-checking responsibility to - // callee's of methods taking byref parameters. They would have to add explicit null checks - // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in - // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too - // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null - // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs. - // This is left here to point out how to implement it. - CLANG_FORMAT_COMMENT_ANCHOR; - -#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1 - - bool addExplicitNullCheck = false; - - // Implicit byref locals and string literals are never null. - if (fgAddrCouldBeNull(objRef)) - { - // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression - // whose address is being taken is either a local or static variable, whose address is necessarily - // non-null, or else it is a field dereference, which will do its own bounds checking if necessary. - if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind)) - { - if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)) - { - addExplicitNullCheck = true; - } - else - { - // In R2R mode the field offset for some fields may change when the code - // is loaded. So we can't rely on a zero offset here to suppress the null check. - // - // See GitHub issue #16454. - bool fieldHasChangeableOffset = false; - -#ifdef FEATURE_READYTORUN_COMPILER - fieldHasChangeableOffset = (tree->AsField()->gtFieldLookup.addr != nullptr); -#endif - -#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION - addExplicitNullCheck = (mac->m_kind == MACK_Addr) && - ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset); -#else - addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr && - ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset)); -#endif - } - } - } - - if (addExplicitNullCheck) - { -#ifdef DEBUG - if (verbose) - { - printf("Before explicit null check morphing:\n"); - gtDispTree(tree); - } -#endif - - // - // Create the "comma" subtree - // - GenTree* asg = nullptr; - GenTree* nullchk; - - unsigned lclNum; - - if (objRef->gtOper != GT_LCL_VAR) - { - lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet())); - - // Create the "asg" node - asg = gtNewTempAssign(lclNum, objRef); - } - else - { - lclNum = objRef->AsLclVarCommon()->GetLclNum(); - } - - GenTree* lclVar = gtNewLclvNode(lclNum, objRefType); - nullchk = gtNewNullCheck(lclVar, compCurBB); - - nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections - - if (asg) - { - // Create the "comma" node. - comma = gtNewOperNode(GT_COMMA, - TYP_VOID, // We don't want to return anything from this "comma" node. - // Set the type to TYP_VOID, so we can select "cmp" instruction - // instead of "mov" instruction later on. - asg, nullchk); - } - else - { - comma = nullchk; - } - - addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node. - } - else - { - addr = objRef; - } - -#ifdef FEATURE_READYTORUN_COMPILER - if (tree->AsField()->gtFieldLookup.addr != nullptr) - { - GenTree* offsetNode = nullptr; - if (tree->AsField()->gtFieldLookup.accessType == IAT_PVALUE) - { - offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->AsField()->gtFieldLookup.addr, - GTF_ICON_CONST_PTR, true); -#ifdef DEBUG - offsetNode->gtGetOp1()->AsIntCon()->gtTargetHandle = (size_t)symHnd; -#endif - } - else - { - noway_assert(!"unexpected accessType for R2R field access"); - } - - var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; - addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode); - } -#endif - if (fldOffset != 0) - { - // Generate the "addr" node. - /* Add the member offset to the object's address */ - FieldSeqNode* fieldSeq = - fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); - addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, - gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq)); - } - - // Now let's set the "tree" as a GT_IND tree. - - tree->SetOper(GT_IND); - tree->AsOp()->gtOp1 = addr; - - tree->SetIndirExceptionFlags(this); - - if (addExplicitNullCheck) - { - // - // Create "comma2" node and link it to "tree". - // - GenTree* comma2; - comma2 = gtNewOperNode(GT_COMMA, - addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node. - comma, addr); - tree->AsOp()->gtOp1 = comma2; - } - -#ifdef DEBUG - if (verbose) - { - if (addExplicitNullCheck) - { - printf("After adding explicit null check:\n"); - gtDispTree(tree); - } - } -#endif - } - else /* This is a static data member */ - { - if (tree->gtFlags & GTF_IND_TLS_REF) - { - // Thread Local Storage static field reference - // - // Field ref is a TLS 'Thread-Local-Storage' reference - // - // Build this tree: IND(*) # - // | - // ADD(I_IMPL) - // / \. - // / CNS(fldOffset) - // / - // / - // / - // IND(I_IMPL) == [Base of this DLL's TLS] - // | - // ADD(I_IMPL) - // / \. - // / CNS(IdValue*4) or MUL - // / / \. - // IND(I_IMPL) / CNS(4) - // | / - // CNS(TLS_HDL,0x2C) IND - // | - // CNS(pIdAddr) - // - // # Denotes the orginal node - // - void** pIdAddr = nullptr; - unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr); - - // - // If we can we access the TLS DLL index ID value directly - // then pIdAddr will be NULL and - // IdValue will be the actual TLS DLL index ID - // - GenTree* dllRef = nullptr; - if (pIdAddr == nullptr) - { - if (IdValue != 0) - { - dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL); - } - } - else - { - dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_CONST_PTR, true); - - // Next we multiply by 4 - dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL)); - } - -#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides - - // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] - - GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL); - - // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS - if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) - { - tree->gtFlags &= ~GTF_FLD_INITCLASS; - tlsRef->gtFlags |= GTF_ICON_INITCLASS; - } - - tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); - - if (dllRef != nullptr) - { - /* Add the dllRef */ - tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); - } - - /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */ - tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); - - if (fldOffset != 0) - { - FieldSeqNode* fieldSeq = - fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); - GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq); - - /* Add the TLS static field offset to the address */ - - tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode); - } - - // Final indirect to get to actual value of TLS static field - - tree->SetOper(GT_IND); - tree->AsOp()->gtOp1 = tlsRef; - - noway_assert(tree->gtFlags & GTF_IND_TLS_REF); - } - else - { - // Normal static field reference - - // - // If we can we access the static's address directly - // then pFldAddr will be NULL and - // fldAddr will be the actual address of the static field - // - void** pFldAddr = nullptr; - void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr); - - // We should always be able to access this static field address directly - // - assert(pFldAddr == nullptr); - -#ifdef TARGET_64BIT - bool isStaticReadOnlyInited = false; - bool plsSpeculative = true; - if (info.compCompHnd->getStaticFieldCurrentClass(symHnd, &plsSpeculative) != NO_CLASS_HANDLE) - { - isStaticReadOnlyInited = !plsSpeculative; - } - - // even if RelocTypeHint is REL32 let's still prefer IND over GT_CLS_VAR - // for static readonly fields of statically initialized classes - thus we can - // apply GTF_IND_INVARIANT flag and make it hoistable/CSE-friendly - if (isStaticReadOnlyInited || (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))) - { - // The address is not directly addressible, so force it into a - // constant, so we handle it properly - - GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL); - addr->gtType = TYP_I_IMPL; - FieldSeqNode* fieldSeq = - fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); - addr->AsIntCon()->gtFieldSeq = fieldSeq; - // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS - if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) - { - tree->gtFlags &= ~GTF_FLD_INITCLASS; - addr->gtFlags |= GTF_ICON_INITCLASS; - } - - tree->SetOper(GT_IND); - tree->AsOp()->gtOp1 = addr; - - if (isStaticReadOnlyInited) - { - JITDUMP("Marking initialized static read-only field '%s' as invariant.\n", eeGetFieldName(symHnd)); - - // Static readonly field is not null at this point (see getStaticFieldCurrentClass impl). - tree->gtFlags |= (GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL); - tree->gtFlags &= ~GTF_ICON_INITCLASS; - addr->gtFlags = GTF_ICON_CONST_PTR; - } - - return fgMorphSmpOp(tree); - } - else -#endif // TARGET_64BIT - { - // Only volatile or classinit could be set, and they map over - noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0); - static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE); - static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS); - tree->SetOper(GT_CLS_VAR); - tree->AsClsVar()->gtClsVarHnd = symHnd; - FieldSeqNode* fieldSeq = - fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); - tree->AsClsVar()->gtFieldSeq = fieldSeq; - } - - return tree; - } - } - noway_assert(tree->gtOper == GT_IND); - - if (fldOffset == 0) - { - GenTree* addr = tree->AsOp()->gtOp1; - - // 'addr' may be a GT_COMMA. Skip over any comma nodes - addr = addr->gtEffectiveVal(); - -#ifdef DEBUG - if (verbose) - { - printf("\nBefore calling fgAddFieldSeqForZeroOffset:\n"); - gtDispTree(tree); - } -#endif - - // We expect 'addr' to be an address at this point. - assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); - - // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node. - FieldSeqNode* fieldSeq = - fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); - fgAddFieldSeqForZeroOffset(addr, fieldSeq); - } - - // Pass down the current mac; if non null we are computing an address - GenTree* result = fgMorphSmpOp(tree, mac); - -#ifdef DEBUG - if (verbose) - { - printf("\nFinal value of Compiler::fgMorphField after calling fgMorphSmpOp:\n"); - gtDispTree(result); - } -#endif - - return result; -} - -//------------------------------------------------------------------------------ -// fgMorphCallInline: attempt to inline a call -// -// Arguments: -// call - call expression to inline, inline candidate -// inlineResult - result tracking and reporting -// -// Notes: -// Attempts to inline the call. -// -// If successful, callee's IR is inserted in place of the call, and -// is marked with an InlineContext. -// -// If unsuccessful, the transformations done in anticipation of a -// possible inline are undone, and the candidate flag on the call -// is cleared. - -void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) -{ - bool inliningFailed = false; - - // Is this call an inline candidate? - if (call->IsInlineCandidate()) - { - // Attempt the inline - fgMorphCallInlineHelper(call, inlineResult); - - // We should have made up our minds one way or another.... - assert(inlineResult->IsDecided()); - - // If we failed to inline, we have a bit of work to do to cleanup - if (inlineResult->IsFailure()) - { - -#ifdef DEBUG - - // Before we do any cleanup, create a failing InlineContext to - // capture details of the inlining attempt. - m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult); - -#endif - - inliningFailed = true; - - // Clear the Inline Candidate flag so we can ensure later we tried - // inlining all candidates. - // - call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE; - } - } - else - { - // This wasn't an inline candidate. So it must be a GDV candidate. - assert(call->IsGuardedDevirtualizationCandidate()); - - // We already know we can't inline this call, so don't even bother to try. - inliningFailed = true; - } - - // If we failed to inline (or didn't even try), do some cleanup. - if (inliningFailed) - { - if (call->gtReturnType != TYP_VOID) - { - JITDUMP("Inlining [%06u] failed, so bashing " FMT_STMT " to NOP\n", dspTreeID(call), fgMorphStmt->GetID()); - - // Detach the GT_CALL tree from the original statement by - // hanging a "nothing" node to it. Later the "nothing" node will be removed - // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. - - noway_assert(fgMorphStmt->GetRootNode() == call); - fgMorphStmt->SetRootNode(gtNewNothingNode()); - } - } -} - -/***************************************************************************** - * Helper to attempt to inline a call - * Sets success/failure in inline result - * If success, modifies current method's IR with inlinee's IR - * If failed, undoes any speculative modifications to current method - */ - -void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) -{ - // Don't expect any surprises here. - assert(result->IsCandidate()); - - if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING) - { - // For now, attributing this to call site, though it's really - // more of a budget issue (lvaCount currently includes all - // caller and prospective callee locals). We still might be - // able to inline other callees into this caller, or inline - // this callee in other callers. - result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS); - return; - } - - if (call->IsVirtual()) - { - result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL); - return; - } - - // Re-check this because guarded devirtualization may allow these through. - if (gtIsRecursiveCall(call) && call->IsImplicitTailCall()) - { - result->NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL); - return; - } - - // impMarkInlineCandidate() is expected not to mark tail prefixed calls - // and recursive tail calls as inline candidates. - noway_assert(!call->IsTailPrefixedCall()); - noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call)); - - // - // Calling inlinee's compiler to inline the method. - // - - unsigned startVars = lvaCount; - -#ifdef DEBUG - if (verbose) - { - printf("Expanding INLINE_CANDIDATE in statement "); - printStmtID(fgMorphStmt); - printf(" in " FMT_BB ":\n", compCurBB->bbNum); - gtDispStmt(fgMorphStmt); - if (call->IsImplicitTailCall()) - { - printf("Note: candidate is implicit tail call\n"); - } - } -#endif - - impInlineRoot()->m_inlineStrategy->NoteAttempt(result); - - // - // Invoke the compiler to inline the call. - // - - fgInvokeInlineeCompiler(call, result); - - if (result->IsFailure()) - { - // Undo some changes made in anticipation of inlining... - - // Zero out the used locals - memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable)); - for (unsigned i = startVars; i < lvaCount; i++) - { - new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(); // call the constructor. - } - - lvaCount = startVars; - -#ifdef DEBUG - if (verbose) - { - // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount); - } -#endif - - return; - } - -#ifdef DEBUG - if (verbose) - { - // printf("After inlining lvaCount=%d.\n", lvaCount); - } -#endif -} - -//------------------------------------------------------------------------ -// fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp. -// -// Arguments: -// callee - The callee to check -// failReason - If this method returns false, the reason why. Can be nullptr. -// -// Return Value: -// Returns true or false based on whether the callee can be fastTailCalled -// -// Notes: -// This function is target specific and each target will make the fastTailCall -// decision differently. See the notes below. -// -// This function calls fgInitArgInfo() to initialize the arg info table, which -// is used to analyze the argument. This function can alter the call arguments -// by adding argument IR nodes for non-standard arguments. -// -// Windows Amd64: -// A fast tail call can be made whenever the number of callee arguments -// is less than or equal to the number of caller arguments, or we have four -// or fewer callee arguments. This is because, on Windows AMD64, each -// argument uses exactly one register or one 8-byte stack slot. Thus, we only -// need to count arguments, and not be concerned with the size of each -// incoming or outgoing argument. -// -// Can fast tail call examples (amd64 Windows): -// -// -- Callee will have all register arguments -- -// caller(int, int, int, int) -// callee(int, int, float, int) -// -// -- Callee requires stack space that is equal or less than the caller -- -// caller(struct, struct, struct, struct, struct, struct) -// callee(int, int, int, int, int, int) -// -// -- Callee requires stack space that is less than the caller -- -// caller(struct, double, struct, float, struct, struct) -// callee(int, int, int, int, int) -// -// -- Callee will have all register arguments -- -// caller(int) -// callee(int, int, int, int) -// -// Cannot fast tail call examples (amd64 Windows): -// -// -- Callee requires stack space that is larger than the caller -- -// caller(struct, double, struct, float, struct, struct) -// callee(int, int, int, int, int, double, double, double) -// -// -- Callee has a byref struct argument -- -// caller(int, int, int) -// callee(struct(size 3 bytes)) -// -// Unix Amd64 && Arm64: -// A fastTailCall decision can be made whenever the callee's stack space is -// less than or equal to the caller's stack space. There are many permutations -// of when the caller and callee have different stack sizes if there are -// structs being passed to either the caller or callee. -// -// Exceptions: -// 1) If the callee has structs which cannot be enregistered it will be -// reported as cannot fast tail call. This is an implementation limitation -// where the callee only is checked for non enregisterable structs. This is -// tracked with https://github.com/dotnet/runtime/issues/8492. -// -// 2) If the caller or callee has stack arguments and the callee has more -// arguments then the caller it will be reported as cannot fast tail call. -// This is due to a bug in LowerFastTailCall which assumes that -// nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This -// is tracked with https://github.com/dotnet/runtime/issues/8413. -// -// 3) If the callee has a 9 to 16 byte struct argument and the callee has -// stack arguments, the decision will be to not fast tail call. This is -// because before fgMorphArgs is done, the struct is unknown whether it -// will be placed on the stack or enregistered. Therefore, the conservative -// decision of do not fast tail call is taken. This limitations should be -// removed if/when fgMorphArgs no longer depends on fgCanFastTailCall. -// -// Can fast tail call examples (amd64 Unix): -// -// -- Callee will have all register arguments -- -// caller(int, int, int, int) -// callee(int, int, float, int) -// -// -- Callee requires stack space that is equal to the caller -- -// caller({ long, long }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte -// stack -// space -// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space -// -// -- Callee requires stack space that is less than the caller -- -// caller({ long, long }, int, { long, long }, int, { long, long }, { long, long }) 6 int register arguments, 32 byte -// stack -// space -// callee(int, int, int, int, int, int, { long, long } ) // 6 int register arguments, 16 byte stack space -// -// -- Callee will have all register arguments -- -// caller(int) -// callee(int, int, int, int) -// -// Cannot fast tail call examples (amd64 Unix): -// -// -- Callee requires stack space that is larger than the caller -- -// caller(float, float, float, float, float, float, float, float) -- 8 float register arguments -// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space -// -// -- Callee has structs which cannot be enregistered (Implementation Limitation) -- -// caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register -// arguments, 24 byte stack space -// callee({ double, double, double }) -- 24 bytes stack space -// -// -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) -- -// caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space -// callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space -// -// -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) -- -// caller({ double, double, double, double, double, double }) // 48 byte stack -// callee(int, int) -- 2 int registers - -bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) -{ -#if FEATURE_FASTTAILCALL - - // To reach here means that the return types of the caller and callee are tail call compatible. - // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - if (callee->IsTailPrefixedCall()) - { - var_types retType = info.compRetType; - assert(impTailCallRetTypeCompatible(retType, info.compMethodInfo->args.retTypeClass, info.compCallConv, - (var_types)callee->gtReturnType, callee->gtRetClsHnd, - callee->GetUnmanagedCallConv())); - } -#endif - - assert(!callee->AreArgsComplete()); - - fgInitArgInfo(callee); - - fgArgInfo* argInfo = callee->fgArgInfo; - - unsigned calleeArgStackSize = 0; - unsigned callerArgStackSize = info.compArgStackSize; - - for (unsigned index = 0; index < argInfo->ArgCount(); ++index) - { - fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); - - calleeArgStackSize = roundUp(calleeArgStackSize, arg->GetByteAlignment()); - calleeArgStackSize += arg->GetStackByteSize(); - } - calleeArgStackSize = GetOutgoingArgByteSize(calleeArgStackSize); - - auto reportFastTailCallDecision = [&](const char* thisFailReason) { - if (failReason != nullptr) - { - *failReason = thisFailReason; - } - -#ifdef DEBUG - if ((JitConfig.JitReportFastTailCallDecisions()) == 1) - { - if (callee->gtCallType != CT_INDIRECT) - { - const char* methodName; - - methodName = eeGetMethodFullName(callee->gtCallMethHnd); - - printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ", - info.compFullName, methodName); - } - else - { - printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- " - "Decision: ", - info.compFullName); - } - - if (thisFailReason == nullptr) - { - printf("Will fast tailcall"); - } - else - { - printf("Will not fast tailcall (%s)", thisFailReason); - } - - printf(" (CallerArgStackSize: %d, CalleeArgStackSize: %d)\n\n", callerArgStackSize, calleeArgStackSize); - } - else - { - if (thisFailReason == nullptr) - { - JITDUMP("[Fast tailcall decision]: Will fast tailcall\n"); - } - else - { - JITDUMP("[Fast tailcall decision]: Will not fast tailcall (%s)\n", thisFailReason); - } - } -#endif // DEBUG - }; - - if (!opts.compFastTailCalls) - { - reportFastTailCallDecision("Configuration doesn't allow fast tail calls"); - return false; - } - - if (callee->IsStressTailCall()) - { - reportFastTailCallDecision("Fast tail calls are not performed under tail call stress"); - return false; - } - - // Note on vararg methods: - // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. - // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its - // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as - // out-going area required for callee is bounded by caller's fixed argument space. - // - // Note that callee being a vararg method is not a problem since we can account the params being passed. - // - // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg - // method. This is due to the ABI differences for native vararg methods for these platforms. There is - // work required to shuffle arguments to the correct locations. - CLANG_FORMAT_COMMENT_ANCHOR; - -#if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) - if (info.compIsVarArgs || callee->IsVarargs()) - { - reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64"); - return false; - } -#endif // (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) - - if (compLocallocUsed) - { - reportFastTailCallDecision("Localloc used"); - return false; - } - -#ifdef TARGET_AMD64 - // Needed for Jit64 compat. - // In future, enabling fast tail calls from methods that need GS cookie - // check would require codegen side work to emit GS cookie check before a - // tail call. - if (getNeedsGSSecurityCookie()) - { - reportFastTailCallDecision("GS Security cookie check required"); - return false; - } -#endif - - // If the NextCallReturnAddress intrinsic is used we should do normal calls. - if (info.compHasNextCallRetAddr) - { - reportFastTailCallDecision("Uses NextCallReturnAddress intrinsic"); - return false; - } - - if (callee->HasRetBufArg()) // RetBuf - { - // If callee has RetBuf param, caller too must have it. - // Otherwise go the slow route. - if (info.compRetBuffArg == BAD_VAR_NUM) - { - reportFastTailCallDecision("Callee has RetBuf but caller does not."); - return false; - } - } - - // For a fast tail call the caller will use its incoming arg stack space to place - // arguments, so if the callee requires more arg stack space than is available here - // the fast tail call cannot be performed. This is common to all platforms. - // Note that the GC'ness of on stack args need not match since the arg setup area is marked - // as non-interruptible for fast tail calls. - if (calleeArgStackSize > callerArgStackSize) - { - reportFastTailCallDecision("Not enough incoming arg space"); - return false; - } - - // For Windows some struct parameters are copied on the local frame - // and then passed by reference. We cannot fast tail call in these situation - // as we need to keep our frame around. - if (fgCallHasMustCopyByrefParameter(callee)) - { - reportFastTailCallDecision("Callee has a byref parameter"); - return false; - } - - reportFastTailCallDecision(nullptr); - return true; -#else // FEATURE_FASTTAILCALL - if (failReason) - *failReason = "Fast tailcalls are not supported on this platform"; - return false; -#endif -} - -//------------------------------------------------------------------------ -// fgCallHasMustCopyByrefParameter: Check to see if this call has a byref parameter that -// requires a struct copy in the caller. -// -// Arguments: -// callee - The callee to check -// -// Return Value: -// Returns true or false based on whether this call has a byref parameter that -// requires a struct copy in the caller. - -#if FEATURE_FASTTAILCALL -bool Compiler::fgCallHasMustCopyByrefParameter(GenTreeCall* callee) -{ - fgArgInfo* argInfo = callee->fgArgInfo; - - bool hasMustCopyByrefParameter = false; - - for (unsigned index = 0; index < argInfo->ArgCount(); ++index) - { - fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); - - if (arg->isStruct) - { - if (arg->passedByRef) - { - // Generally a byref arg will block tail calling, as we have to - // make a local copy of the struct for the callee. - hasMustCopyByrefParameter = true; - - // If we're optimizing, we may be able to pass our caller's byref to our callee, - // and so still be able to avoid a struct copy. - if (opts.OptimizationEnabled()) - { - // First, see if this arg is an implicit byref param. - GenTreeLclVar* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); - - if (lcl != nullptr) - { - // Yes, the arg is an implicit byref param. - const unsigned lclNum = lcl->GetLclNum(); - LclVarDsc* const varDsc = lvaGetDesc(lcl); - - // The param must not be promoted; if we've promoted, then the arg will be - // a local struct assembled from the promoted fields. - if (varDsc->lvPromoted) - { - JITDUMP("Arg [%06u] is promoted implicit byref V%02u, so no tail call\n", - dspTreeID(arg->GetNode()), lclNum); - } - else - { - JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n", - dspTreeID(arg->GetNode()), lclNum); - - // We have to worry about introducing aliases if we bypass copying - // the struct at the call. We'll do some limited analysis to see if we - // can rule this out. - const unsigned argLimit = 6; - - // If this is the only appearance of the byref in the method, then - // aliasing is not possible. - // - // If no other call arg refers to this byref, and no other arg is - // a pointer which could refer to this byref, we can optimize. - // - // We only check this for calls with small numbers of arguments, - // as the analysis cost will be quadratic. - // - if (varDsc->lvRefCnt(RCS_EARLY) == 1) - { - JITDUMP("... yes, arg is the only appearance of V%02u\n", lclNum); - hasMustCopyByrefParameter = false; - } - else if (argInfo->ArgCount() <= argLimit) - { - GenTree* interferingArg = nullptr; - for (unsigned index2 = 0; index2 < argInfo->ArgCount(); ++index2) - { - if (index2 == index) - { - continue; - } - - fgArgTabEntry* const arg2 = argInfo->GetArgEntry(index2, false); - JITDUMP("... checking other arg [%06u]...\n", dspTreeID(arg2->GetNode())); - DISPTREE(arg2->GetNode()); - - // Do we pass 'lcl' more than once to the callee? - if (arg2->isStruct && arg2->passedByRef) - { - GenTreeLclVarCommon* const lcl2 = - arg2->GetNode()->IsImplicitByrefParameterValue(this); - - if ((lcl2 != nullptr) && (lclNum == lcl2->GetLclNum())) - { - // not copying would introduce aliased implicit byref structs - // in the callee ... we can't optimize. - interferingArg = arg2->GetNode(); - break; - } - else - { - JITDUMP("... arg refers to different implicit byref V%02u\n", - lcl2->GetLclNum()); - continue; - } - } - - // Do we pass a byref pointer which might point within 'lcl'? - // - // We can assume the 'lcl' is unaliased on entry to the - // method, so the only way we can have an aliasing byref pointer at - // the call is if 'lcl' is address taken/exposed in the method. - // - // Note even though 'lcl' is not promoted, we are in the middle - // of the promote->rewrite->undo->(morph)->demote cycle, and so - // might see references to promoted fields of 'lcl' that haven't yet - // been demoted (see fgMarkDemotedImplicitByRefArgs). - // - // So, we also need to scan all 'lcl's fields, if any, to see if they - // are exposed. - // - // When looking for aliases from other args, we check for both TYP_BYREF - // and TYP_I_IMPL typed args here. Conceptually anything that points into - // an implicit byref parameter should be TYP_BYREF, as these parameters could - // refer to boxed heap locations (say if the method is invoked by reflection) - // but there are some stack only structs (like typed references) where - // the importer/runtime code uses TYP_I_IMPL, and fgInitArgInfo will - // transiently retype all simple address-of implicit parameter args as - // TYP_I_IMPL. - // - if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL)) - { - JITDUMP("...arg is a byref, must run an alias check\n"); - bool checkExposure = true; - bool hasExposure = false; - - // See if there is any way arg could refer to a parameter struct. - GenTree* arg2Node = arg2->GetNode(); - if (arg2Node->OperIs(GT_LCL_VAR)) - { - GenTreeLclVarCommon* arg2LclNode = arg2Node->AsLclVarCommon(); - assert(arg2LclNode->GetLclNum() != lclNum); - LclVarDsc* arg2Dsc = lvaGetDesc(arg2LclNode); - - // Other params can't alias implicit byref params - if (arg2Dsc->lvIsParam) - { - checkExposure = false; - } - } - // Because we're checking TYP_I_IMPL above, at least - // screen out obvious things that can't cause aliases. - else if (arg2Node->IsIntegralConst()) - { - checkExposure = false; - } - - if (checkExposure) - { - JITDUMP( - "... not sure where byref arg points, checking if V%02u is exposed\n", - lclNum); - // arg2 might alias arg, see if we've exposed - // arg somewhere in the method. - if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) - { - // Struct as a whole is exposed, can't optimize - JITDUMP("... V%02u is exposed\n", lclNum); - hasExposure = true; - } - else if (varDsc->lvFieldLclStart != 0) - { - // This is the promoted/undone struct case. - // - // The field start is actually the local number of the promoted local, - // use it to enumerate the fields. - const unsigned promotedLcl = varDsc->lvFieldLclStart; - LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl); - JITDUMP("...promoted-unpromoted case -- also checking exposure of " - "fields of V%02u\n", - promotedLcl); - - for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt; - fieldIndex++) - { - LclVarDsc* fieldDsc = - lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex); - - if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed) - { - // Promoted and not yet demoted field is exposed, can't optimize - JITDUMP("... field V%02u is exposed\n", - promotedVarDsc->lvFieldLclStart + fieldIndex); - hasExposure = true; - break; - } - } - } - } - - if (hasExposure) - { - interferingArg = arg2->GetNode(); - break; - } - } - else - { - JITDUMP("...arg is not a byref or implicit byref (%s)\n", - varTypeName(arg2->GetNode()->TypeGet())); - } - } - - if (interferingArg != nullptr) - { - JITDUMP("... no, arg [%06u] may alias with V%02u\n", dspTreeID(interferingArg), - lclNum); - } - else - { - JITDUMP("... yes, no other arg in call can alias V%02u\n", lclNum); - hasMustCopyByrefParameter = false; - } - } - else - { - JITDUMP(" ... no, call has %u > %u args, alias analysis deemed too costly\n", - argInfo->ArgCount(), argLimit); - } - } - } - } - - if (hasMustCopyByrefParameter) - { - // This arg requires a struct copy. No reason to keep scanning the remaining args. - break; - } - } - } - } - - return hasMustCopyByrefParameter; -} -#endif - -//------------------------------------------------------------------------ -// fgMorphPotentialTailCall: Attempt to morph a call that the importer has -// identified as a potential tailcall to an actual tailcall and return the -// placeholder node to use in this case. -// -// Arguments: -// call - The call to morph. -// -// Return Value: -// Returns a node to use if the call was morphed into a tailcall. If this -// function returns a node the call is done being morphed and the new node -// should be used. Otherwise the call will have been demoted to a regular call -// and should go through normal morph. -// -// Notes: -// This is called only for calls that the importer has already identified as -// potential tailcalls. It will do profitability and legality checks and -// classify which kind of tailcall we are able to (or should) do, along with -// modifying the trees to perform that kind of tailcall. -// -GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) -{ - // It should either be an explicit (i.e. tail prefixed) or an implicit tail call - assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall()); - - // It cannot be an inline candidate - assert(!call->IsInlineCandidate()); - - auto failTailCall = [&](const char* reason, unsigned lclNum = BAD_VAR_NUM) { -#ifdef DEBUG - if (verbose) - { - printf("\nRejecting tail call in morph for call "); - printTreeID(call); - printf(": %s", reason); - if (lclNum != BAD_VAR_NUM) - { - printf(" V%02u", lclNum); - } - printf("\n"); - } -#endif - - // for non user funcs, we have no handles to report - info.compCompHnd->reportTailCallDecision(nullptr, - (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, - call->IsTailPrefixedCall(), TAILCALL_FAIL, reason); - - // We have checked the candidate so demote. - call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; -#if FEATURE_TAILCALL_OPT - call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; -#endif - }; - - if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) - { - failTailCall("Might turn into an intrinsic"); - return nullptr; - } - - // Heuristic: regular calls to noreturn methods can sometimes be - // merged, so if we have multiple such calls, we defer tail calling. - // - // TODO: re-examine this; now that we're merging before morph we - // don't need to worry about interfering with merges. - // - if (call->IsNoReturn() && (optNoReturnCallCount > 1)) - { - failTailCall("Defer tail calling throw helper; anticipating merge"); - return nullptr; - } - -#ifdef DEBUG - if (opts.compGcChecks && (info.compRetType == TYP_REF)) - { - failTailCall("COMPlus_JitGCChecks or stress might have interposed a call to CORINFO_HELP_CHECK_OBJ, " - "invalidating tailcall opportunity"); - return nullptr; - } -#endif - - // We have to ensure to pass the incoming retValBuf as the - // outgoing one. Using a temp will not do as this function will - // not regain control to do the copy. This can happen when inlining - // a tailcall which also has a potential tailcall in it: the IL looks - // like we can do a tailcall, but the trees generated use a temp for the inlinee's - // result. TODO-CQ: Fix this. - if (info.compRetBuffArg != BAD_VAR_NUM) - { - noway_assert(call->TypeGet() == TYP_VOID); - GenTree* retValBuf = call->gtCallArgs->GetNode(); - if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg) - { - failTailCall("Need to copy return buffer"); - return nullptr; - } - } - - // We are still not sure whether it can be a tail call. Because, when converting - // a call to an implicit tail call, we must check that there are no locals with - // their address taken. If this is the case, we have to assume that the address - // has been leaked and the current stack frame must live until after the final - // call. - - // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note - // that lvHasLdAddrOp is much more conservative. We cannot just base it on - // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs - // during morph stage. The reason for also checking lvAddrExposed is that in case - // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp. - // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us - // never to be incorrect. - // - // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose - // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed - // is set. This avoids the need for iterating through all lcl vars of the current - // method. Right now throughout the code base we are not consistently using 'set' - // method to set lvHasLdAddrOp and lvAddrExposed flags. - - bool isImplicitOrStressTailCall = call->IsImplicitTailCall() || call->IsStressTailCall(); - if (isImplicitOrStressTailCall && compLocallocUsed) - { - failTailCall("Localloc used"); - return nullptr; - } - - bool hasStructParam = false; - for (unsigned varNum = 0; varNum < lvaCount; varNum++) - { - LclVarDsc* varDsc = lvaTable + varNum; - // If the method is marked as an explicit tail call we will skip the - // following three hazard checks. - // We still must check for any struct parameters and set 'hasStructParam' - // so that we won't transform the recursive tail call into a loop. - // - if (isImplicitOrStressTailCall) - { - if (varDsc->lvHasLdAddrOp && !lvaIsImplicitByRefLocal(varNum)) - { - failTailCall("Local address taken", varNum); - return nullptr; - } - if (varDsc->lvAddrExposed) - { - if (lvaIsImplicitByRefLocal(varNum)) - { - // The address of the implicit-byref is a non-address use of the pointer parameter. - } - else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl)) - { - // The address of the implicit-byref's field is likewise a non-address use of the pointer - // parameter. - } - else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum)) - { - // This temp was used for struct promotion bookkeeping. It will not be used, and will have - // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs. - assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl)); - assert(fgGlobalMorph); - } - else - { - failTailCall("Local address taken", varNum); - return nullptr; - } - } - if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum)) - { - failTailCall("Has Struct Promoted Param", varNum); - return nullptr; - } - if (varDsc->lvPinned) - { - // A tail call removes the method from the stack, which means the pinning - // goes away for the callee. We can't allow that. - failTailCall("Has Pinned Vars", varNum); - return nullptr; - } - } - - if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam) - { - hasStructParam = true; - // This prevents transforming a recursive tail call into a loop - // but doesn't prevent tail call optimization so we need to - // look at the rest of parameters. - } - } - - if (!fgCheckStmtAfterTailCall()) - { - failTailCall("Unexpected statements after the tail call"); - return nullptr; - } - - const char* failReason = nullptr; - bool canFastTailCall = fgCanFastTailCall(call, &failReason); - - CORINFO_TAILCALL_HELPERS tailCallHelpers; - bool tailCallViaJitHelper = false; - if (!canFastTailCall) - { - if (call->IsImplicitTailCall()) - { - // Implicit or opportunistic tail calls are always dispatched via fast tail call - // mechanism and never via tail call helper for perf. - failTailCall(failReason); - return nullptr; - } - - assert(call->IsTailPrefixedCall()); - assert(call->tailCallInfo != nullptr); - - // We do not currently handle non-standard args except for VSD stubs. - if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this)) - { - failTailCall( - "Method with non-standard args passed in callee trash register cannot be tail called via helper"); - return nullptr; - } - - // On x86 we have a faster mechanism than the general one which we use - // in almost all cases. See fgCanTailCallViaJitHelper for more information. - if (fgCanTailCallViaJitHelper()) - { - tailCallViaJitHelper = true; - } - else - { - // Make sure we can get the helpers. We do this last as the runtime - // will likely be required to generate these. - CORINFO_RESOLVED_TOKEN* token = nullptr; - CORINFO_SIG_INFO* sig = call->tailCallInfo->GetSig(); - unsigned flags = 0; - if (!call->tailCallInfo->IsCalli()) - { - token = call->tailCallInfo->GetToken(); - if (call->tailCallInfo->IsCallvirt()) - { - flags |= CORINFO_TAILCALL_IS_CALLVIRT; - } - } - - if (call->gtCallThisArg != nullptr) - { - var_types thisArgType = call->gtCallThisArg->GetNode()->TypeGet(); - if (thisArgType != TYP_REF) - { - flags |= CORINFO_TAILCALL_THIS_ARG_IS_BYREF; - } - } - - if (!info.compCompHnd->getTailCallHelpers(token, sig, (CORINFO_GET_TAILCALL_HELPERS_FLAGS)flags, - &tailCallHelpers)) - { - failTailCall("Tail call help not available"); - return nullptr; - } - } - } - - // Check if we can make the tailcall a loop. - bool fastTailCallToLoop = false; -#if FEATURE_TAILCALL_OPT - // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register - // or return type is a struct that can be passed in a register. - // - // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through - // hidden generic context param or through keep alive thisptr), then while transforming a recursive - // call to such a method requires that the generic context stored on stack slot be updated. Right now, - // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming - // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the - // generic type parameters of both caller and callee generic method are the same. - if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() && - !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet())) - { - fastTailCallToLoop = true; - } -#endif - - // Ok -- now we are committed to performing a tailcall. Report the decision. - CorInfoTailCall tailCallResult; - if (fastTailCallToLoop) - { - tailCallResult = TAILCALL_RECURSIVE; - } - else if (canFastTailCall) - { - tailCallResult = TAILCALL_OPTIMIZED; - } - else - { - tailCallResult = TAILCALL_HELPER; - } - - info.compCompHnd->reportTailCallDecision(nullptr, - (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, - call->IsTailPrefixedCall(), tailCallResult, nullptr); - - // Are we currently planning to expand the gtControlExpr as an early virtual call target? - // - if (call->IsExpandedEarly() && call->IsVirtualVtable()) - { - // It isn't alway profitable to expand a virtual call early - // - // We alway expand the TAILCALL_HELPER type late. - // And we exapnd late when we have an optimized tail call - // and the this pointer needs to be evaluated into a temp. - // - if (tailCallResult == TAILCALL_HELPER) - { - // We will alway expand this late in lower instead. - // (see LowerTailCallViaJitHelper as it needs some work - // for us to be able to expand this earlier in morph) - // - call->ClearExpandedEarly(); - } - else if ((tailCallResult == TAILCALL_OPTIMIZED) && - ((call->gtCallThisArg->GetNode()->gtFlags & GTF_SIDE_EFFECT) != 0)) - { - // We generate better code when we expand this late in lower instead. - // - call->ClearExpandedEarly(); - } - } - - // Now actually morph the call. - compTailCallUsed = true; - // This will prevent inlining this call. - call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; - if (tailCallViaJitHelper) - { - call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_VIA_JIT_HELPER; - } - -#if FEATURE_TAILCALL_OPT - if (fastTailCallToLoop) - { - call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP; - } -#endif - - // Mark that this is no longer a pending tailcall. We need to do this before - // we call fgMorphCall again (which happens in the fast tailcall case) to - // avoid recursing back into this method. - call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; -#if FEATURE_TAILCALL_OPT - call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; -#endif - -#ifdef DEBUG - if (verbose) - { - printf("\nGTF_CALL_M_TAILCALL bit set for call "); - printTreeID(call); - printf("\n"); - if (fastTailCallToLoop) - { - printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call "); - printTreeID(call); - printf("\n"); - } - } -#endif - - // If this block has a flow successor, make suitable updates. - // - BasicBlock* const nextBlock = compCurBB->GetUniqueSucc(); - - if (nextBlock == nullptr) - { - // No unique successor. compCurBB should be a return. - // - assert(compCurBB->bbJumpKind == BBJ_RETURN); - } - else - { - // Flow no longer reaches nextBlock from here. - // - fgRemoveRefPred(nextBlock, compCurBB); - - // Adjust profile weights. - // - // Note if this is a tail call to loop, further updates - // are needed once we install the loop edge. - // - if (compCurBB->hasProfileWeight() && nextBlock->hasProfileWeight()) - { - // Since we have linear flow we can update the next block weight. - // - BasicBlock::weight_t const blockWeight = compCurBB->bbWeight; - BasicBlock::weight_t const nextWeight = nextBlock->bbWeight; - BasicBlock::weight_t const newNextWeight = nextWeight - blockWeight; - - // If the math would result in a negative weight then there's - // no local repair we can do; just leave things inconsistent. - // - if (newNextWeight >= 0) - { - // Note if we'd already morphed the IR in nextblock we might - // have done something profile sensitive that we should arguably reconsider. - // - JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum, - nextWeight, newNextWeight); - - nextBlock->setBBProfileWeight(newNextWeight); - } - else - { - JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT - " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", - nextBlock->bbNum, nextWeight, compCurBB->bbNum, blockWeight); - } - - // If nextBlock is not a BBJ_RETURN, it should have a unique successor that - // is a BBJ_RETURN, as we allow a little bit of flow after a tail call. - // - if (nextBlock->bbJumpKind != BBJ_RETURN) - { - BasicBlock* const nextNextBlock = nextBlock->GetUniqueSucc(); - assert(nextNextBlock->bbJumpKind == BBJ_RETURN); - - if (nextNextBlock->hasProfileWeight()) - { - // Do similar updates here. - // - BasicBlock::weight_t const nextNextWeight = nextNextBlock->bbWeight; - BasicBlock::weight_t const newNextNextWeight = nextNextWeight - blockWeight; - - // If the math would result in an negative weight then there's - // no local repair we can do; just leave things inconsistent. - // - if (newNextNextWeight >= 0) - { - JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", - nextNextBlock->bbNum, nextNextWeight, newNextNextWeight); - - nextNextBlock->setBBProfileWeight(newNextNextWeight); - } - else - { - JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT - " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", - nextNextBlock->bbNum, nextNextWeight, compCurBB->bbNum, blockWeight); - } - } - } - } - } - -#if !FEATURE_TAILCALL_OPT_SHARED_RETURN - // We enable shared-ret tail call optimization for recursive calls even if - // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. - if (gtIsRecursiveCall(call)) -#endif - { - // Many tailcalls will have call and ret in the same block, and thus be - // BBJ_RETURN, but if the call falls through to a ret, and we are doing a - // tailcall, change it here. - compCurBB->bbJumpKind = BBJ_RETURN; - } - - GenTree* stmtExpr = fgMorphStmt->GetRootNode(); - -#ifdef DEBUG - // Tail call needs to be in one of the following IR forms - // Either a call stmt or - // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..))) - // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..))) - // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP) - // In the above, - // GT_CASTS may be nested. - genTreeOps stmtOper = stmtExpr->gtOper; - if (stmtOper == GT_CALL) - { - assert(stmtExpr == call); - } - else - { - assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA); - GenTree* treeWithCall; - if (stmtOper == GT_RETURN) - { - treeWithCall = stmtExpr->gtGetOp1(); - } - else if (stmtOper == GT_COMMA) - { - // Second operation must be nop. - assert(stmtExpr->gtGetOp2()->IsNothingNode()); - treeWithCall = stmtExpr->gtGetOp1(); - } - else - { - treeWithCall = stmtExpr->gtGetOp2(); - } - - // Peel off casts - while (treeWithCall->gtOper == GT_CAST) - { - assert(!treeWithCall->gtOverflow()); - treeWithCall = treeWithCall->gtGetOp1(); - } - - assert(treeWithCall == call); - } -#endif - // Store the call type for later to introduce the correct placeholder. - var_types origCallType = call->TypeGet(); - - GenTree* result; - if (!canFastTailCall && !tailCallViaJitHelper) - { - // For tailcall via CORINFO_TAILCALL_HELPERS we transform into regular - // calls with (to the JIT) regular control flow so we do not need to do - // much special handling. - result = fgMorphTailCallViaHelpers(call, tailCallHelpers); - } - else - { - // Otherwise we will transform into something that does not return. For - // fast tailcalls a "jump" and for tailcall via JIT helper a call to a - // JIT helper that does not return. So peel off everything after the - // call. - Statement* nextMorphStmt = fgMorphStmt->GetNextStmt(); - JITDUMP("Remove all stmts after the call.\n"); - while (nextMorphStmt != nullptr) - { - Statement* stmtToRemove = nextMorphStmt; - nextMorphStmt = stmtToRemove->GetNextStmt(); - fgRemoveStmt(compCurBB, stmtToRemove); - } - - bool isRootReplaced = false; - GenTree* root = fgMorphStmt->GetRootNode(); - - if (root != call) - { - JITDUMP("Replace root node [%06d] with [%06d] tail call node.\n", dspTreeID(root), dspTreeID(call)); - isRootReplaced = true; - fgMorphStmt->SetRootNode(call); - } - - // Avoid potential extra work for the return (for example, vzeroupper) - call->gtType = TYP_VOID; - - // Do some target-specific transformations (before we process the args, - // etc.) for the JIT helper case. - if (tailCallViaJitHelper) - { - fgMorphTailCallViaJitHelper(call); - - // Force re-evaluating the argInfo. fgMorphTailCallViaJitHelper will modify the - // argument list, invalidating the argInfo. - call->fgArgInfo = nullptr; - } - - // Tail call via JIT helper: The VM can't use return address hijacking - // if we're not going to return and the helper doesn't have enough info - // to safely poll, so we poll before the tail call, if the block isn't - // already safe. Since tail call via helper is a slow mechanism it - // doen't matter whether we emit GC poll. his is done to be in parity - // with Jit64. Also this avoids GC info size increase if all most all - // methods are expected to be tail calls (e.g. F#). - // - // Note that we can avoid emitting GC-poll if we know that the current - // BB is dominated by a Gc-SafePoint block. But we don't have dominator - // info at this point. One option is to just add a place holder node for - // GC-poll (e.g. GT_GCPOLL) here and remove it in lowering if the block - // is dominated by a GC-SafePoint. For now it not clear whether - // optimizing slow tail calls is worth the effort. As a low cost check, - // we check whether the first and current basic blocks are - // GC-SafePoints. - // - // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, - // fgSetBlockOrder() is going to mark the method as fully interruptible - // if the block containing this tail call is reachable without executing - // any call. - BasicBlock* curBlock = compCurBB; - if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) || - (fgCreateGCPoll(GCPOLL_INLINE, compCurBB) == curBlock)) - { - // We didn't insert a poll block, so we need to morph the call now - // (Normally it will get morphed when we get to the split poll block) - GenTree* temp = fgMorphCall(call); - noway_assert(temp == call); - } - - // Fast tail call: in case of fast tail calls, we need a jmp epilog and - // hence mark it as BBJ_RETURN with BBF_JMP flag set. - noway_assert(compCurBB->bbJumpKind == BBJ_RETURN); - if (canFastTailCall) - { - compCurBB->bbFlags |= BBF_HAS_JMP; - } - else - { - // We call CORINFO_HELP_TAILCALL which does not return, so we will - // not need epilogue. - compCurBB->bbJumpKind = BBJ_THROW; - } - - if (isRootReplaced) - { - // We have replaced the root node of this stmt and deleted the rest, - // but we still have the deleted, dead nodes on the `fgMorph*` stack - // if the root node was an `ASG`, `RET` or `CAST`. - // Return a zero con node to exit morphing of the old trees without asserts - // and forbid POST_ORDER morphing doing something wrong with our call. - var_types callType; - if (varTypeIsStruct(origCallType)) - { - CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd; - Compiler::structPassingKind howToReturnStruct; - callType = getReturnTypeForStruct(retClsHnd, call->GetUnmanagedCallConv(), &howToReturnStruct); - assert((howToReturnStruct != SPK_Unknown) && (howToReturnStruct != SPK_ByReference)); - if (howToReturnStruct == SPK_ByValue) - { - callType = TYP_I_IMPL; - } - else if (howToReturnStruct == SPK_ByValueAsHfa || varTypeIsSIMD(callType)) - { - callType = TYP_FLOAT; - } - assert((callType != TYP_UNKNOWN) && !varTypeIsStruct(callType)); - } - else - { - callType = origCallType; - } - assert((callType != TYP_UNKNOWN) && !varTypeIsStruct(callType)); - callType = genActualType(callType); - - GenTree* zero = gtNewZeroConNode(callType); - result = fgMorphTree(zero); - } - else - { - result = call; - } - } - - return result; -} - -//------------------------------------------------------------------------ -// fgMorphTailCallViaHelpers: Transform the given GT_CALL tree for tailcall code -// generation. -// -// Arguments: -// call - The call to transform -// helpers - The tailcall helpers provided by the runtime. -// -// Return Value: -// Returns the transformed node. -// -// Notes: -// This transforms -// GT_CALL -// {callTarget} -// {this} -// {args} -// into -// GT_COMMA -// GT_CALL StoreArgsStub -// {callTarget} (depending on flags provided by the runtime) -// {this} (as a regular arg) -// {args} -// GT_COMMA -// GT_CALL Dispatcher -// GT_ADDR ReturnAddress -// {CallTargetStub} -// GT_ADDR ReturnValue -// GT_LCL ReturnValue -// whenever the call node returns a value. If the call node does not return a -// value the last comma will not be there. -// -GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL_HELPERS& help) -{ - // R2R requires different handling but we don't support tailcall via - // helpers in R2R yet, so just leave it for now. - // TODO: R2R: TailCallViaHelper - assert(!opts.IsReadyToRun()); - - JITDUMP("fgMorphTailCallViaHelpers (before):\n"); - DISPTREE(call); - - // Don't support tail calling helper methods - assert(call->gtCallType != CT_HELPER); - - // We come this route only for tail prefixed calls that cannot be dispatched as - // fast tail calls - assert(!call->IsImplicitTailCall()); - - // We want to use the following assert, but it can modify the IR in some cases, so we - // can't do that in an assert. - // assert(!fgCanFastTailCall(call, nullptr)); - - bool virtualCall = call->IsVirtual(); - - // If VSD then get rid of arg to VSD since we turn this into a direct call. - // The extra arg will be the first arg so this needs to be done before we - // handle the retbuf below. - if (call->IsVirtualStub()) - { - JITDUMP("This is a VSD\n"); -#if FEATURE_FASTTAILCALL - call->ResetArgInfo(); -#endif - - call->gtFlags &= ~GTF_CALL_VIRT_STUB; - } - - GenTree* callDispatcherAndGetResult = fgCreateCallDispatcherAndGetResult(call, help.hCallTarget, help.hDispatcher); - - // Change the call to a call to the StoreArgs stub. - if (call->HasRetBufArg()) - { - JITDUMP("Removing retbuf"); - call->gtCallArgs = call->gtCallArgs->GetNext(); - call->gtCallMoreFlags &= ~GTF_CALL_M_RETBUFFARG; - - // We changed args so recompute info. - call->fgArgInfo = nullptr; - } - - const bool stubNeedsTargetFnPtr = (help.flags & CORINFO_TAILCALL_STORE_TARGET) != 0; - - GenTree* doBeforeStoreArgsStub = nullptr; - GenTree* thisPtrStubArg = nullptr; - - // Put 'this' in normal param list - if (call->gtCallThisArg != nullptr) - { - JITDUMP("Moving this pointer into arg list\n"); - GenTree* objp = call->gtCallThisArg->GetNode(); - GenTree* thisPtr = nullptr; - call->gtCallThisArg = nullptr; - - // JIT will need one or two copies of "this" in the following cases: - // 1) the call needs null check; - // 2) StoreArgs stub needs the target function pointer address and if the call is virtual - // the stub also needs "this" in order to evalute the target. - - const bool callNeedsNullCheck = call->NeedsNullCheck(); - const bool stubNeedsThisPtr = stubNeedsTargetFnPtr && virtualCall; - - // TODO-Review: The following transformation is implemented under assumption that - // both conditions can be true. However, I could not construct such example - // where a virtual tail call would require null check. In case, if the conditions - // are mutually exclusive the following could be simplified. - - if (callNeedsNullCheck || stubNeedsThisPtr) - { - // Clone "this" if "this" has no side effects. - if ((objp->gtFlags & GTF_SIDE_EFFECT) == 0) - { - thisPtr = gtClone(objp, true); - } - - // Create a temp and spill "this" to the temp if "this" has side effects or "this" was too complex to clone. - if (thisPtr == nullptr) - { - const unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); - - // tmp = "this" - doBeforeStoreArgsStub = gtNewTempAssign(lclNum, objp); - - if (callNeedsNullCheck) - { - // COMMA(tmp = "this", deref(tmp)) - GenTree* tmp = gtNewLclvNode(lclNum, objp->TypeGet()); - GenTree* nullcheck = gtNewNullCheck(tmp, compCurBB); - doBeforeStoreArgsStub = gtNewOperNode(GT_COMMA, TYP_VOID, doBeforeStoreArgsStub, nullcheck); - } - - thisPtr = gtNewLclvNode(lclNum, objp->TypeGet()); - - if (stubNeedsThisPtr) - { - thisPtrStubArg = gtNewLclvNode(lclNum, objp->TypeGet()); - } - } - else - { - if (callNeedsNullCheck) - { - // deref("this") - doBeforeStoreArgsStub = gtNewNullCheck(objp, compCurBB); - - if (stubNeedsThisPtr) - { - thisPtrStubArg = gtClone(objp, true); - } - } - else - { - assert(stubNeedsThisPtr); - - thisPtrStubArg = objp; - } - } - - call->gtFlags &= ~GTF_CALL_NULLCHECK; - - assert((thisPtrStubArg != nullptr) == stubNeedsThisPtr); - } - else - { - thisPtr = objp; - } - - // During rationalization tmp="this" and null check will be materialized - // in the right execution order. - assert(thisPtr != nullptr); - call->gtCallArgs = gtPrependNewCallArg(thisPtr, call->gtCallArgs); - call->fgArgInfo = nullptr; - } - - // We may need to pass the target, for instance for calli or generic methods - // where we pass instantiating stub. - if (stubNeedsTargetFnPtr) - { - JITDUMP("Adding target since VM requested it\n"); - GenTree* target; - if (!virtualCall) - { - if (call->gtCallType == CT_INDIRECT) - { - noway_assert(call->gtCallAddr != nullptr); - target = call->gtCallAddr; - } - else - { - CORINFO_CONST_LOOKUP addrInfo; - info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo); - - CORINFO_GENERIC_HANDLE handle = nullptr; - void* pIndirection = nullptr; - assert(addrInfo.accessType != IAT_PPVALUE && addrInfo.accessType != IAT_RELPVALUE); - - if (addrInfo.accessType == IAT_VALUE) - { - handle = addrInfo.handle; - } - else if (addrInfo.accessType == IAT_PVALUE) - { - pIndirection = addrInfo.addr; - } - target = gtNewIconEmbHndNode(handle, pIndirection, GTF_ICON_FTN_ADDR, call->gtCallMethHnd); - } - } - else - { - assert(!call->tailCallInfo->GetSig()->hasTypeArg()); - - CORINFO_CALL_INFO callInfo; - unsigned flags = CORINFO_CALLINFO_LDFTN; - if (call->tailCallInfo->IsCallvirt()) - { - flags |= CORINFO_CALLINFO_CALLVIRT; - } - - eeGetCallInfo(call->tailCallInfo->GetToken(), nullptr, (CORINFO_CALLINFO_FLAGS)flags, &callInfo); - target = getVirtMethodPointerTree(thisPtrStubArg, call->tailCallInfo->GetToken(), &callInfo); - } - - // Insert target as last arg - GenTreeCall::Use** newArgSlot = &call->gtCallArgs; - while (*newArgSlot != nullptr) - { - newArgSlot = &(*newArgSlot)->NextRef(); - } - - *newArgSlot = gtNewCallArgs(target); - - call->fgArgInfo = nullptr; - } - - // This is now a direct call to the store args stub and not a tailcall. - call->gtCallType = CT_USER_FUNC; - call->gtCallMethHnd = help.hStoreArgs; - call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; - call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_DELEGATE_INV | GTF_CALL_M_WRAPPER_DELEGATE_INV); - - // The store-args stub returns no value. - call->gtRetClsHnd = nullptr; - call->gtType = TYP_VOID; - call->gtReturnType = TYP_VOID; - - GenTree* callStoreArgsStub = call; - - if (doBeforeStoreArgsStub != nullptr) - { - callStoreArgsStub = gtNewOperNode(GT_COMMA, TYP_VOID, doBeforeStoreArgsStub, callStoreArgsStub); - } - - GenTree* finalTree = - gtNewOperNode(GT_COMMA, callDispatcherAndGetResult->TypeGet(), callStoreArgsStub, callDispatcherAndGetResult); - - finalTree = fgMorphTree(finalTree); - - JITDUMP("fgMorphTailCallViaHelpers (after):\n"); - DISPTREE(finalTree); - return finalTree; -} - -//------------------------------------------------------------------------ -// fgCreateCallDispatcherAndGetResult: Given a call -// CALL -// {callTarget} -// {retbuf} -// {this} -// {args} -// create a similarly typed node that calls the tailcall dispatcher and returns -// the result, as in the following: -// COMMA -// CALL TailCallDispatcher -// ADDR ReturnAddress -// &CallTargetFunc -// ADDR RetValue -// RetValue -// If the call has type TYP_VOID, only create the CALL node. -// -// Arguments: -// origCall - the call -// callTargetStubHnd - the handle of the CallTarget function (this is a special -// IL stub created by the runtime) -// dispatcherHnd - the handle of the tailcall dispatcher function -// -// Return Value: -// A node that can be used in place of the original call. -// -GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* origCall, - CORINFO_METHOD_HANDLE callTargetStubHnd, - CORINFO_METHOD_HANDLE dispatcherHnd) -{ - GenTreeCall* callDispatcherNode = - gtNewCallNode(CT_USER_FUNC, dispatcherHnd, TYP_VOID, nullptr, fgMorphStmt->GetILOffsetX()); - // The dispatcher has signature - // void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, void* retValue) - - // Add return value arg. - GenTree* retValArg; - GenTree* retVal = nullptr; - unsigned int newRetLcl = BAD_VAR_NUM; - GenTree* copyToRetBufNode = nullptr; - - if (origCall->HasRetBufArg()) - { - JITDUMP("Transferring retbuf\n"); - GenTree* retBufArg = origCall->gtCallArgs->GetNode(); - - assert(info.compRetBuffArg != BAD_VAR_NUM); - assert(retBufArg->OperIsLocal()); - assert(retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg); - - if (info.compRetBuffDefStack) - { - // Use existing retbuf. - retValArg = retBufArg; - } - else - { - // Caller return buffer argument retBufArg can point to GC heap while the dispatcher expects - // the return value argument retValArg to point to the stack. - // We use a temporary stack allocated return buffer to hold the value during the dispatcher call - // and copy the value back to the caller return buffer after that. - unsigned int tmpRetBufNum = lvaGrabTemp(true DEBUGARG("substitute local for return buffer")); - - constexpr bool unsafeValueClsCheck = false; - lvaSetStruct(tmpRetBufNum, origCall->gtRetClsHnd, unsafeValueClsCheck); - lvaSetVarAddrExposed(tmpRetBufNum); - - var_types tmpRetBufType = lvaGetDesc(tmpRetBufNum)->TypeGet(); - - retValArg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(tmpRetBufNum, tmpRetBufType)); - - var_types callerRetBufType = lvaGetDesc(info.compRetBuffArg)->TypeGet(); - - GenTree* dstAddr = gtNewLclvNode(info.compRetBuffArg, callerRetBufType); - GenTree* dst = gtNewObjNode(info.compMethodInfo->args.retTypeClass, dstAddr); - GenTree* src = gtNewLclvNode(tmpRetBufNum, tmpRetBufType); - - constexpr bool isVolatile = false; - constexpr bool isCopyBlock = true; - copyToRetBufNode = gtNewBlkOpNode(dst, src, isVolatile, isCopyBlock); - } - - if (origCall->gtType != TYP_VOID) - { - retVal = gtClone(retBufArg); - } - } - else if (origCall->gtType != TYP_VOID) - { - JITDUMP("Creating a new temp for the return value\n"); - newRetLcl = lvaGrabTemp(false DEBUGARG("Return value for tail call dispatcher")); - if (varTypeIsStruct(origCall->gtType)) - { - lvaSetStruct(newRetLcl, origCall->gtRetClsHnd, false); - } - else - { - // Since we pass a reference to the return value to the dispatcher - // we need to use the real return type so we can normalize it on - // load when we return it. - lvaTable[newRetLcl].lvType = (var_types)origCall->gtReturnType; - } - - lvaSetVarAddrExposed(newRetLcl); - - retValArg = - gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(newRetLcl, genActualType(lvaTable[newRetLcl].lvType))); - retVal = gtNewLclvNode(newRetLcl, genActualType(lvaTable[newRetLcl].lvType)); - - if (varTypeIsStruct(origCall->gtType)) - { - retVal = impFixupStructReturnType(retVal, origCall->gtRetClsHnd, origCall->GetUnmanagedCallConv()); - } - } - else - { - JITDUMP("No return value so using null pointer as arg\n"); - retValArg = gtNewZeroConNode(TYP_I_IMPL); - } - - callDispatcherNode->gtCallArgs = gtPrependNewCallArg(retValArg, callDispatcherNode->gtCallArgs); - - // Add callTarget - callDispatcherNode->gtCallArgs = - gtPrependNewCallArg(new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, callTargetStubHnd), - callDispatcherNode->gtCallArgs); - - // Add the caller's return address slot. - if (lvaRetAddrVar == BAD_VAR_NUM) - { - lvaRetAddrVar = lvaGrabTemp(false DEBUGARG("Return address")); - lvaTable[lvaRetAddrVar].lvType = TYP_I_IMPL; - lvaSetVarAddrExposed(lvaRetAddrVar); - } - - GenTree* retAddrSlot = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaRetAddrVar, TYP_I_IMPL)); - callDispatcherNode->gtCallArgs = gtPrependNewCallArg(retAddrSlot, callDispatcherNode->gtCallArgs); - - GenTree* finalTree = callDispatcherNode; - - if (copyToRetBufNode != nullptr) - { - finalTree = gtNewOperNode(GT_COMMA, TYP_VOID, callDispatcherNode, copyToRetBufNode); - } - - if (origCall->gtType == TYP_VOID) - { - return finalTree; - } - - assert(retVal != nullptr); - finalTree = gtNewOperNode(GT_COMMA, origCall->TypeGet(), finalTree, retVal); - - // The JIT seems to want to CSE this comma and messes up multi-reg ret - // values in the process. Just avoid CSE'ing this tree entirely in that - // case. - if (origCall->HasMultiRegRetVal()) - { - finalTree->gtFlags |= GTF_DONT_CSE; - } - - return finalTree; -} - -//------------------------------------------------------------------------ -// getLookupTree: get a lookup tree -// -// Arguments: -// pResolvedToken - resolved token of the call -// pLookup - the lookup to get the tree for -// handleFlags - flags to set on the result node -// compileTimeHandle - compile-time handle corresponding to the lookup -// -// Return Value: -// A node representing the lookup tree -// -GenTree* Compiler::getLookupTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, - CORINFO_LOOKUP* pLookup, - unsigned handleFlags, - void* compileTimeHandle) -{ - if (!pLookup->lookupKind.needsRuntimeLookup) - { - // No runtime lookup is required. - // Access is direct or memory-indirect (of a fixed address) reference - - CORINFO_GENERIC_HANDLE handle = nullptr; - void* pIndirection = nullptr; - assert(pLookup->constLookup.accessType != IAT_PPVALUE && pLookup->constLookup.accessType != IAT_RELPVALUE); - - if (pLookup->constLookup.accessType == IAT_VALUE) - { - handle = pLookup->constLookup.handle; - } - else if (pLookup->constLookup.accessType == IAT_PVALUE) - { - pIndirection = pLookup->constLookup.addr; - } - - return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, compileTimeHandle); - } - - return getRuntimeLookupTree(pResolvedToken, pLookup, compileTimeHandle); -} - -//------------------------------------------------------------------------ -// getRuntimeLookupTree: get a tree for a runtime lookup -// -// Arguments: -// pResolvedToken - resolved token of the call -// pLookup - the lookup to get the tree for -// compileTimeHandle - compile-time handle corresponding to the lookup -// -// Return Value: -// A node representing the runtime lookup tree -// -GenTree* Compiler::getRuntimeLookupTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, - CORINFO_LOOKUP* pLookup, - void* compileTimeHandle) -{ - assert(!compIsForInlining()); - - CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup; - - // If pRuntimeLookup->indirections is equal to CORINFO_USEHELPER, it specifies that a run-time helper should be - // used; otherwise, it specifies the number of indirections via pRuntimeLookup->offsets array. - if ((pRuntimeLookup->indirections == CORINFO_USEHELPER) || pRuntimeLookup->testForNull || - pRuntimeLookup->testForFixup) - { - // If the first condition is true, runtime lookup tree is available only via the run-time helper function. - // TODO-CQ If the second or third condition is true, we are always using the slow path since we can't - // introduce control flow at this point. See impRuntimeLookupToTree for the logic to avoid calling the helper. - // The long-term solution is to introduce a new node representing a runtime lookup, create instances - // of that node both in the importer and here, and expand the node in lower (introducing control flow if - // necessary). - return gtNewRuntimeLookupHelperCallNode(pRuntimeLookup, - getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind), - compileTimeHandle); - } - - GenTree* result = getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind); - - ArrayStack stmts(getAllocator(CMK_ArrayStack)); - - auto cloneTree = [&](GenTree** tree DEBUGARG(const char* reason)) { - if (!((*tree)->gtFlags & GTF_GLOB_EFFECT)) - { - GenTree* clone = gtClone(*tree, true); - - if (clone) - { - return clone; - } - } - - unsigned temp = lvaGrabTemp(true DEBUGARG(reason)); - stmts.Push(gtNewTempAssign(temp, *tree)); - *tree = gtNewLclvNode(temp, lvaGetActualType(temp)); - return gtNewLclvNode(temp, lvaGetActualType(temp)); - }; - - // Apply repeated indirections - for (WORD i = 0; i < pRuntimeLookup->indirections; i++) - { - GenTree* preInd = nullptr; - if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset)) - { - preInd = cloneTree(&result DEBUGARG("getRuntimeLookupTree indirectOffset")); - } - - if (i != 0) - { - result = gtNewOperNode(GT_IND, TYP_I_IMPL, result); - result->gtFlags |= GTF_IND_NONFAULTING; - result->gtFlags |= GTF_IND_INVARIANT; - } - - if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset)) - { - result = gtNewOperNode(GT_ADD, TYP_I_IMPL, preInd, result); - } - - if (pRuntimeLookup->offsets[i] != 0) - { - result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewIconNode(pRuntimeLookup->offsets[i], TYP_I_IMPL)); - } - } - - assert(!pRuntimeLookup->testForNull); - if (pRuntimeLookup->indirections > 0) - { - assert(!pRuntimeLookup->testForFixup); - result = gtNewOperNode(GT_IND, TYP_I_IMPL, result); - result->gtFlags |= GTF_IND_NONFAULTING; - } - - // Produces GT_COMMA(stmt1, GT_COMMA(stmt2, ... GT_COMMA(stmtN, result))) - - while (!stmts.Empty()) - { - result = gtNewOperNode(GT_COMMA, TYP_I_IMPL, stmts.Pop(), result); - } - - DISPTREE(result); - return result; -} - -//------------------------------------------------------------------------ -// getVirtMethodPointerTree: get a tree for a virtual method pointer -// -// Arguments: -// thisPtr - tree representing `this` pointer -// pResolvedToken - pointer to the resolved token of the method -// pCallInfo - pointer to call info -// -// Return Value: -// A node representing the virtual method pointer - -GenTree* Compiler::getVirtMethodPointerTree(GenTree* thisPtr, - CORINFO_RESOLVED_TOKEN* pResolvedToken, - CORINFO_CALL_INFO* pCallInfo) -{ - GenTree* exactTypeDesc = getTokenHandleTree(pResolvedToken, true); - GenTree* exactMethodDesc = getTokenHandleTree(pResolvedToken, false); - - GenTreeCall::Use* helpArgs = gtNewCallArgs(thisPtr, exactTypeDesc, exactMethodDesc); - return gtNewHelperCallNode(CORINFO_HELP_VIRTUAL_FUNC_PTR, TYP_I_IMPL, helpArgs); -} - -//------------------------------------------------------------------------ -// getTokenHandleTree: get a handle tree for a token -// -// Arguments: -// pResolvedToken - token to get a handle for -// parent - whether parent should be imported -// -// Return Value: -// A node representing the virtual method pointer - -GenTree* Compiler::getTokenHandleTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool parent) -{ - CORINFO_GENERICHANDLE_RESULT embedInfo; - info.compCompHnd->embedGenericHandle(pResolvedToken, parent ? TRUE : FALSE, &embedInfo); - - GenTree* result = getLookupTree(pResolvedToken, &embedInfo.lookup, gtTokenToIconFlags(pResolvedToken->token), - embedInfo.compileTimeHandle); - - // If we have a result and it requires runtime lookup, wrap it in a runtime lookup node. - if ((result != nullptr) && embedInfo.lookup.lookupKind.needsRuntimeLookup) - { - result = gtNewRuntimeLookup(embedInfo.compileTimeHandle, embedInfo.handleType, result); - } - - return result; -} - -/***************************************************************************** - * - * Transform the given GT_CALL tree for tail call via JIT helper. - */ -void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call) -{ - JITDUMP("fgMorphTailCallViaJitHelper (before):\n"); - DISPTREE(call); - - // The runtime requires that we perform a null check on the `this` argument before - // tail calling to a virtual dispatch stub. This requirement is a consequence of limitations - // in the runtime's ability to map an AV to a NullReferenceException if - // the AV occurs in a dispatch stub that has unmanaged caller. - if (call->IsVirtualStub()) - { - call->gtFlags |= GTF_CALL_NULLCHECK; - } - - // For the helper-assisted tail calls, we need to push all the arguments - // into a single list, and then add a few extra at the beginning or end. - // - // For x86, the tailcall helper is defined as: - // - // JIT_TailCall(, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* - // callTarget) - // - // Note that the special arguments are on the stack, whereas the function arguments follow - // the normal convention: there might be register arguments in ECX and EDX. The stack will - // look like (highest address at the top): - // first normal stack argument - // ... - // last normal stack argument - // numberOfOldStackArgs - // numberOfNewStackArgs - // flags - // callTarget - // - // Each special arg is 4 bytes. - // - // 'flags' is a bitmask where: - // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all - // callee-saved registers for tailcall functions. Note that the helper assumes - // that the callee-saved registers live immediately below EBP, and must have been - // pushed in this order: EDI, ESI, EBX. - // 2 == call target is a virtual stub dispatch. - // - // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details - // on the custom calling convention. - - // Check for PInvoke call types that we don't handle in codegen yet. - assert(!call->IsUnmanaged()); - assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr)); - - // Don't support tail calling helper methods - assert(call->gtCallType != CT_HELPER); - - // We come this route only for tail prefixed calls that cannot be dispatched as - // fast tail calls - assert(!call->IsImplicitTailCall()); - - // We want to use the following assert, but it can modify the IR in some cases, so we - // can't do that in an assert. - // assert(!fgCanFastTailCall(call, nullptr)); - - // First move the 'this' pointer (if any) onto the regular arg list. We do this because - // we are going to prepend special arguments onto the argument list (for non-x86 platforms), - // and thus shift where the 'this' pointer will be passed to a later argument slot. In - // addition, for all platforms, we are going to change the call into a helper call. Our code - // generation code for handling calls to helpers does not handle 'this' pointers. So, when we - // do this transformation, we must explicitly create a null 'this' pointer check, if required, - // since special 'this' pointer handling will no longer kick in. - // - // Some call types, such as virtual vtable calls, require creating a call address expression - // that involves the "this" pointer. Lowering will sometimes create an embedded statement - // to create a temporary that is assigned to the "this" pointer expression, and then use - // that temp to create the call address expression. This temp creation embedded statement - // will occur immediately before the "this" pointer argument, and then will be used for both - // the "this" pointer argument as well as the call address expression. In the normal ordering, - // the embedded statement establishing the "this" pointer temp will execute before both uses - // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the - // normal call argument list, and insert a placeholder which will hold the call address - // expression. For non-x86, things are ok, because the order of execution of these is not - // altered. However, for x86, the call address expression is inserted as the *last* argument - // in the argument list, *after* the "this" pointer. It will be put on the stack, and be - // evaluated first. To ensure we don't end up with out-of-order temp definition and use, - // for those cases where call lowering creates an embedded form temp of "this", we will - // create a temp here, early, that will later get morphed correctly. - - if (call->gtCallThisArg != nullptr) - { - GenTree* thisPtr = nullptr; - GenTree* objp = call->gtCallThisArg->GetNode(); - call->gtCallThisArg = nullptr; - - if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal()) - { - // tmp = "this" - unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); - GenTree* asg = gtNewTempAssign(lclNum, objp); - - // COMMA(tmp = "this", tmp) - var_types vt = objp->TypeGet(); - GenTree* tmp = gtNewLclvNode(lclNum, vt); - thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp); - - objp = thisPtr; - } - - if (call->NeedsNullCheck()) - { - // clone "this" if "this" has no side effects. - if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT)) - { - thisPtr = gtClone(objp, true); - } - - var_types vt = objp->TypeGet(); - if (thisPtr == nullptr) - { - // create a temp if either "this" has side effects or "this" is too complex to clone. - - // tmp = "this" - unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); - GenTree* asg = gtNewTempAssign(lclNum, objp); - - // COMMA(tmp = "this", deref(tmp)) - GenTree* tmp = gtNewLclvNode(lclNum, vt); - GenTree* nullcheck = gtNewNullCheck(tmp, compCurBB); - asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, nullcheck); - - // COMMA(COMMA(tmp = "this", deref(tmp)), tmp) - thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); - } - else - { - // thisPtr = COMMA(deref("this"), "this") - GenTree* nullcheck = gtNewNullCheck(thisPtr, compCurBB); - thisPtr = gtNewOperNode(GT_COMMA, vt, nullcheck, gtClone(objp, true)); - } - - call->gtFlags &= ~GTF_CALL_NULLCHECK; - } - else - { - thisPtr = objp; - } - - // TODO-Cleanup: we leave it as a virtual stub call to - // use logic in `LowerVirtualStubCall`, clear GTF_CALL_VIRT_KIND_MASK here - // and change `LowerCall` to recognize it as a direct call. - - // During rationalization tmp="this" and null check will - // materialize as embedded stmts in right execution order. - assert(thisPtr != nullptr); - call->gtCallArgs = gtPrependNewCallArg(thisPtr, call->gtCallArgs); - } - - // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will - // append to the list. - GenTreeCall::Use** ppArg = &call->gtCallArgs; - for (GenTreeCall::Use& use : call->Args()) - { - ppArg = &use.NextRef(); - } - assert(ppArg != nullptr); - assert(*ppArg == nullptr); - - unsigned nOldStkArgsWords = - (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; - GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); - *ppArg = gtNewCallArgs(arg3); // numberOfOldStackArgs - ppArg = &((*ppArg)->NextRef()); - - // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. - // The constant will be replaced. - GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL); - *ppArg = gtNewCallArgs(arg2); // numberOfNewStackArgs - ppArg = &((*ppArg)->NextRef()); - - // Inject a placeholder for the flags. - // The constant will be replaced. - GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL); - *ppArg = gtNewCallArgs(arg1); - ppArg = &((*ppArg)->NextRef()); - - // Inject a placeholder for the real call target that the Lowering phase will generate. - // The constant will be replaced. - GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL); - *ppArg = gtNewCallArgs(arg0); - - // It is now a varargs tail call. - call->gtCallMoreFlags |= GTF_CALL_M_VARARGS; - call->gtFlags &= ~GTF_CALL_POP_ARGS; - - // The function is responsible for doing explicit null check when it is necessary. - assert(!call->NeedsNullCheck()); - - JITDUMP("fgMorphTailCallViaJitHelper (after):\n"); - DISPTREE(call); -} - -//------------------------------------------------------------------------ -// fgGetStubAddrArg: Return the virtual stub address for the given call. -// -// Notes: -// the JIT must place the address of the stub used to load the call target, -// the "stub indirection cell", in special call argument with special register. -// -// Arguments: -// call - a call that needs virtual stub dispatching. -// -// Return Value: -// addr tree with set resister requirements. -// -GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) -{ - assert(call->IsVirtualStub()); - GenTree* stubAddrArg; - if (call->gtCallType == CT_INDIRECT) - { - stubAddrArg = gtClone(call->gtCallAddr, true); - } - else - { - assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT); - ssize_t addr = ssize_t(call->gtStubCallStubAddr); - stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); -#ifdef DEBUG - stubAddrArg->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; -#endif - } - assert(stubAddrArg != nullptr); - stubAddrArg->SetRegNum(virtualStubParamInfo->GetReg()); - return stubAddrArg; -} - -//------------------------------------------------------------------------------ -// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop. -// -// -// Arguments: -// block - basic block ending with a recursive fast tail call -// recursiveTailCall - recursive tail call to transform -// -// Notes: -// The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop. - -void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall) -{ - assert(recursiveTailCall->IsTailCallConvertibleToLoop()); - Statement* lastStmt = block->lastStmt(); - assert(recursiveTailCall == lastStmt->GetRootNode()); - - // Transform recursive tail call into a loop. - - Statement* earlyArgInsertionPoint = lastStmt; - IL_OFFSETX callILOffset = lastStmt->GetILOffsetX(); - - // Hoist arg setup statement for the 'this' argument. - GenTreeCall::Use* thisArg = recursiveTailCall->gtCallThisArg; - if ((thisArg != nullptr) && !thisArg->GetNode()->IsNothingNode() && !thisArg->GetNode()->IsArgPlaceHolderNode()) - { - Statement* thisArgStmt = gtNewStmt(thisArg->GetNode(), callILOffset); - fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt); - } - - // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first; - // then the temps need to be assigned to the method parameters. This is done so that the caller - // parameters are not re-assigned before call arguments depending on them are evaluated. - // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of - // where the next temp or parameter assignment should be inserted. - - // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first - // while the second call argument (const 1) doesn't. - // Basic block before tail recursion elimination: - // ***** BB04, stmt 1 (top level) - // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013) - // [000033] --C - G------ - \--* call void RecursiveMethod - // [000030] ------------ | / --* const int - 1 - // [000031] ------------arg0 in rcx + --* +int - // [000029] ------------ | \--* lclVar int V00 arg1 - // [000032] ------------arg1 in rdx \--* const int 1 - // - // - // Basic block after tail recursion elimination : - // ***** BB04, stmt 1 (top level) - // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) - // [000030] ------------ | / --* const int - 1 - // [000031] ------------ | / --* +int - // [000029] ------------ | | \--* lclVar int V00 arg1 - // [000050] - A---------- \--* = int - // [000049] D------N---- \--* lclVar int V02 tmp0 - // - // ***** BB04, stmt 2 (top level) - // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) - // [000052] ------------ | / --* lclVar int V02 tmp0 - // [000054] - A---------- \--* = int - // [000053] D------N---- \--* lclVar int V00 arg0 - - // ***** BB04, stmt 3 (top level) - // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) - // [000032] ------------ | / --* const int 1 - // [000057] - A---------- \--* = int - // [000056] D------N---- \--* lclVar int V01 arg1 - - Statement* tmpAssignmentInsertionPoint = lastStmt; - Statement* paramAssignmentInsertionPoint = lastStmt; - - // Process early args. They may contain both setup statements for late args and actual args. - // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum - // below has the correct second argument. - int earlyArgIndex = (thisArg == nullptr) ? 0 : 1; - for (GenTreeCall::Use& use : recursiveTailCall->Args()) - { - GenTree* earlyArg = use.GetNode(); - if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode()) - { - if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0) - { - // This is a setup node so we need to hoist it. - Statement* earlyArgStmt = gtNewStmt(earlyArg, callILOffset); - fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt); - } - else - { - // This is an actual argument that needs to be assigned to the corresponding caller parameter. - fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); - Statement* paramAssignStmt = - fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset, - tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); - if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) - { - // All temp assignments will happen before the first param assignment. - tmpAssignmentInsertionPoint = paramAssignStmt; - } - } - } - earlyArgIndex++; - } - - // Process late args. - int lateArgIndex = 0; - for (GenTreeCall::Use& use : recursiveTailCall->LateArgs()) - { - // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. - GenTree* lateArg = use.GetNode(); - fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); - Statement* paramAssignStmt = - fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset, - tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); - - if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) - { - // All temp assignments will happen before the first param assignment. - tmpAssignmentInsertionPoint = paramAssignStmt; - } - lateArgIndex++; - } - - // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that - // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that - // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here. - if (!info.compIsStatic && (lvaArg0Var != info.compThisArg)) - { - var_types thisType = lvaTable[info.compThisArg].TypeGet(); - GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType); - GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType)); - Statement* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset); - fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt); - } - - // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog - // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization - // for all non-parameter IL locals as well as temp structs with GC fields. - // Liveness phase will remove unnecessary initializations. - if (info.compInitMem || compSuppressedZeroInit) - { - unsigned varNum; - LclVarDsc* varDsc; - for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) - { -#if FEATURE_FIXED_OUT_ARGS - if (varNum == lvaOutgoingArgSpaceVar) - { - continue; - } -#endif // FEATURE_FIXED_OUT_ARGS - if (!varDsc->lvIsParam) - { - var_types lclType = varDsc->TypeGet(); - bool isUserLocal = (varNum < info.compLocalsCount); - bool structWithGCFields = ((lclType == TYP_STRUCT) && varDsc->GetLayout()->HasGCPtr()); - bool hadSuppressedInit = varDsc->lvSuppressedZeroInit; - if ((info.compInitMem && (isUserLocal || structWithGCFields)) || hadSuppressedInit) - { - GenTree* lcl = gtNewLclvNode(varNum, lclType); - GenTree* init = nullptr; - if (varTypeIsStruct(lclType)) - { - const bool isVolatile = false; - const bool isCopyBlock = false; - init = gtNewBlkOpNode(lcl, gtNewIconNode(0), isVolatile, isCopyBlock); - init = fgMorphInitBlock(init); - } - else - { - GenTree* zero = gtNewZeroConNode(genActualType(lclType)); - init = gtNewAssignNode(lcl, zero); - } - Statement* initStmt = gtNewStmt(init, callILOffset); - fgInsertStmtBefore(block, lastStmt, initStmt); - } - } - } - } - - // Remove the call - fgRemoveStmt(block, lastStmt); - - // Set the loop edge. - if (opts.IsOSR()) - { - // Todo: this may not look like a viable loop header. - // Might need the moral equivalent of a scratch BB. - block->bbJumpDest = fgEntryBB; - } - else - { - // Ensure we have a scratch block and then target the next - // block. Loop detection needs to see a pred out of the loop, - // so mark the scratch block BBF_DONT_REMOVE to prevent empty - // block removal on it. - fgEnsureFirstBBisScratch(); - fgFirstBB->bbFlags |= BBF_DONT_REMOVE; - block->bbJumpDest = fgFirstBB->bbNext; - } - - // Finish hooking things up. - block->bbJumpKind = BBJ_ALWAYS; - block->bbJumpDest->bbFlags |= BBF_JMP_TARGET; - fgAddRefPred(block->bbJumpDest, block); - block->bbFlags &= ~BBF_HAS_JMP; -} - -//------------------------------------------------------------------------------ -// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter. -// -// -// Arguments: -// arg - argument to assign -// argTabEntry - argument table entry corresponding to arg -// block --- basic block the call is in -// callILOffset - IL offset of the call -// tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary) -// paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted -// -// Return Value: -// parameter assignment statement if one was inserted; nullptr otherwise. - -Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, - fgArgTabEntry* argTabEntry, - BasicBlock* block, - IL_OFFSETX callILOffset, - Statement* tmpAssignmentInsertionPoint, - Statement* paramAssignmentInsertionPoint) -{ - // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because - // some argument trees may reference parameters directly. - - GenTree* argInTemp = nullptr; - unsigned originalArgNum = argTabEntry->argNum; - bool needToAssignParameter = true; - - // TODO-CQ: enable calls with struct arguments passed in registers. - noway_assert(!varTypeIsStruct(arg->TypeGet())); - - if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) - { - // The argument is already assigned to a temp or is a const. - argInTemp = arg; - } - else if (arg->OperGet() == GT_LCL_VAR) - { - unsigned lclNum = arg->AsLclVar()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[lclNum]; - if (!varDsc->lvIsParam) - { - // The argument is a non-parameter local so it doesn't need to be assigned to a temp. - argInTemp = arg; - } - else if (lclNum == originalArgNum) - { - // The argument is the same parameter local that we were about to assign so - // we can skip the assignment. - needToAssignParameter = false; - } - } - - // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve - // any caller parameters. Some common cases are handled above but we may be able to eliminate - // more temp assignments. - - Statement* paramAssignStmt = nullptr; - if (needToAssignParameter) - { - if (argInTemp == nullptr) - { - // The argument is not assigned to a temp. We need to create a new temp and insert an assignment. - // TODO: we can avoid a temp assignment if we can prove that the argument tree - // doesn't involve any caller parameters. - unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp")); - lvaTable[tmpNum].lvType = arg->gtType; - GenTree* tempSrc = arg; - GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType); - GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc); - Statement* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset); - fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt); - argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType); - } - - // Now assign the temp to the parameter. - LclVarDsc* paramDsc = lvaTable + originalArgNum; - assert(paramDsc->lvIsParam); - GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType); - GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp); - paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset); - - fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt); - } - return paramAssignStmt; -} - -/***************************************************************************** - * - * Transform the given GT_CALL tree for code generation. - */ - -GenTree* Compiler::fgMorphCall(GenTreeCall* call) -{ - if (call->CanTailCall()) - { - GenTree* newNode = fgMorphPotentialTailCall(call); - if (newNode != nullptr) - { - return newNode; - } - - assert(!call->CanTailCall()); - -#if FEATURE_MULTIREG_RET - if (fgGlobalMorph && call->HasMultiRegRetVal() && varTypeIsStruct(call->TypeGet())) - { - // The tail call has been rejected so we must finish the work deferred - // by impFixupCallStructReturn for multi-reg-returning calls and transform - // ret call - // into - // temp = call - // ret temp - - // Force re-evaluating the argInfo as the return argument has changed. - call->ResetArgInfo(); - - // Create a new temp. - unsigned tmpNum = - lvaGrabTemp(false DEBUGARG("Return value temp for multi-reg return (rejected tail call).")); - lvaTable[tmpNum].lvIsMultiRegRet = true; - - CORINFO_CLASS_HANDLE structHandle = call->gtRetClsHnd; - assert(structHandle != NO_CLASS_HANDLE); - const bool unsafeValueClsCheck = false; - lvaSetStruct(tmpNum, structHandle, unsafeValueClsCheck); - var_types structType = lvaTable[tmpNum].lvType; - GenTree* dst = gtNewLclvNode(tmpNum, structType); - GenTree* assg = gtNewAssignNode(dst, call); - assg = fgMorphTree(assg); - - // Create the assignment statement and insert it before the current statement. - Statement* assgStmt = gtNewStmt(assg, compCurStmt->GetILOffsetX()); - fgInsertStmtBefore(compCurBB, compCurStmt, assgStmt); - - // Return the temp. - GenTree* result = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType); - result->gtFlags |= GTF_DONT_CSE; - - compCurBB->bbFlags |= BBF_HAS_CALL; // This block has a call - -#ifdef DEBUG - if (verbose) - { - printf("\nInserting assignment of a multi-reg call result to a temp:\n"); - gtDispStmt(assgStmt); - } - result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - return result; - } -#endif - } - - if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 && - (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR) -#ifdef FEATURE_READYTORUN_COMPILER - || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR) -#endif - ) && - (call == fgMorphStmt->GetRootNode())) - { - // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result. - // Transform it into a null check. - - GenTree* thisPtr = call->gtCallArgs->GetNode(); - - GenTree* nullCheck = gtNewNullCheck(thisPtr, compCurBB); - - return fgMorphTree(nullCheck); - } - - noway_assert(call->gtOper == GT_CALL); - - // - // Only count calls once (only in the global morph phase) - // - if (fgGlobalMorph) - { - if (call->gtCallType == CT_INDIRECT) - { - optCallCount++; - optIndirectCallCount++; - } - else if (call->gtCallType == CT_USER_FUNC) - { - optCallCount++; - if (call->IsVirtual()) - { - optIndirectCallCount++; - } - } - } - - // Couldn't inline - remember that this BB contains method calls - - // Mark the block as a GC safe point for the call if possible. - // In the event the call indicates the block isn't a GC safe point - // and the call is unmanaged with a GC transition suppression request - // then insert a GC poll. - CLANG_FORMAT_COMMENT_ANCHOR; - - if (IsGcSafePoint(call)) - { - compCurBB->bbFlags |= BBF_GC_SAFE_POINT; - } - - // Regardless of the state of the basic block with respect to GC safe point, - // we will always insert a GC Poll for scenarios involving a suppressed GC - // transition. Only mark the block for GC Poll insertion on the first morph. - if (fgGlobalMorph && call->IsUnmanaged() && call->IsSuppressGCTransition()) - { - compCurBB->bbFlags |= (BBF_HAS_SUPPRESSGC_CALL | BBF_GC_SAFE_POINT); - optMethodFlags |= OMF_NEEDS_GCPOLLS; - } - - // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag - // - // We need to do these before the arguments are morphed - if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)) - { - // See if this is foldable - GenTree* optTree = gtFoldExprCall(call); - - // If we optimized, morph the result - if (optTree != call) - { - return fgMorphTree(optTree); - } - } - - compCurBB->bbFlags |= BBF_HAS_CALL; // This block has a call - - // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack. - GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require - // copy-back). - unsigned retValTmpNum = BAD_VAR_NUM; - CORINFO_CLASS_HANDLE structHnd = nullptr; - if (call->HasRetBufArg() && - call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null). - { - // We're enforcing the invariant that return buffers pointers (at least for - // struct return types containing GC pointers) are never pointers into the heap. - // The large majority of cases are address of local variables, which are OK. - // Otherwise, allocate a local of the given struct type, pass its address, - // then assign from that into the proper destination. (We don't need to do this - // if we're passing the caller's ret buff arg to the callee, since the caller's caller - // will maintain the same invariant.) - - GenTree* dest = call->gtCallArgs->GetNode(); - assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above. - if (dest->TypeIs(TYP_BYREF) && !dest->IsLocalAddrExpr()) - { - // We'll exempt helper calls from this, assuming that the helper implementation - // follows the old convention, and does whatever barrier is required. - if (call->gtCallType != CT_HELPER) - { - structHnd = call->gtRetClsHnd; - if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) && - !(dest->OperGet() == GT_LCL_VAR && dest->AsLclVar()->GetLclNum() == info.compRetBuffArg)) - { - // Force re-evaluating the argInfo as the return argument has changed. - call->fgArgInfo = nullptr; - origDest = dest; - - retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg")); - lvaSetStruct(retValTmpNum, structHnd, true); - dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); - } - } - } - - call->gtCallArgs->SetNode(dest); - } - - /* Process the "normal" argument list */ - call = fgMorphArgs(call); - noway_assert(call->gtOper == GT_CALL); - - // Should we expand this virtual method call target early here? - // - if (call->IsExpandedEarly() && call->IsVirtualVtable()) - { - // We only expand the Vtable Call target once in the global morph phase - if (fgGlobalMorph) - { - assert(call->gtControlExpr == nullptr); // We only call this method and assign gtControlExpr once - call->gtControlExpr = fgExpandVirtualVtableCallTarget(call); - } - // We always have to morph or re-morph the control expr - // - call->gtControlExpr = fgMorphTree(call->gtControlExpr); - - // Propogate any gtFlags into the call - call->gtFlags |= call->gtControlExpr->gtFlags; - } - - // Morph stelem.ref helper call to store a null value, into a store into an array without the helper. - // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place. - if (opts.OptimizationEnabled() && (call->gtCallType == CT_HELPER) && - (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST))) - { - GenTree* value = gtArgEntryByArgNum(call, 2)->GetNode(); - if (value->IsIntegralConst(0)) - { - assert(value->OperGet() == GT_CNS_INT); - - GenTree* arr = gtArgEntryByArgNum(call, 0)->GetNode(); - GenTree* index = gtArgEntryByArgNum(call, 1)->GetNode(); - - // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy - // the spill trees as well if necessary. - GenTreeOp* argSetup = nullptr; - for (GenTreeCall::Use& use : call->Args()) - { - GenTree* const arg = use.GetNode(); - if (arg->OperGet() != GT_ASG) - { - continue; - } - - assert(arg != arr); - assert(arg != index); - - arg->gtFlags &= ~GTF_LATE_ARG; - - GenTree* op1 = argSetup; - if (op1 == nullptr) - { - op1 = gtNewNothingNode(); -#if DEBUG - op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - } - - argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg); - -#if DEBUG - argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - } - -#ifdef DEBUG - auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult { - (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; - return WALK_CONTINUE; - }; - - fgWalkTreePost(&arr, resetMorphedFlag); - fgWalkTreePost(&index, resetMorphedFlag); - fgWalkTreePost(&value, resetMorphedFlag); -#endif // DEBUG - - GenTree* const nullCheckedArr = impCheckForNullPointer(arr); - GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index); - GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value); - arrStore->gtFlags |= GTF_ASG; - - GenTree* result = fgMorphTree(arrStore); - if (argSetup != nullptr) - { - result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result); -#if DEBUG - result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - } - - return result; - } - } - - if (origDest != nullptr) - { - GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); - // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused - // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the - // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to - // be correct. - if (origDest->OperGet() == GT_ASG) - { - if (origDest->AsOp()->gtOp1->OperGet() == GT_LCL_VAR) - { - GenTree* var = origDest->AsOp()->gtOp1; - origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest, - gtNewLclvNode(var->AsLclVar()->GetLclNum(), var->TypeGet())); - } - } - GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false); - copyBlk = fgMorphTree(copyBlk); - GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk); -#ifdef DEBUG - result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - return result; - } - - if (call->IsNoReturn()) - { - // - // If we know that the call does not return then we can set fgRemoveRestOfBlock - // to remove all subsequent statements and change the call's basic block to BBJ_THROW. - // As a result the compiler won't need to preserve live registers across the call. - // - // This isn't need for tail calls as there shouldn't be any code after the call anyway. - // Besides, the tail call code is part of the epilog and converting the block to - // BBJ_THROW would result in the tail call being dropped as the epilog is generated - // only for BBJ_RETURN blocks. - // - - if (!call->IsTailCall()) - { - fgRemoveRestOfBlock = true; - } - } - - return call; -} - -/***************************************************************************** - * - * Expand and return the call target address for a VirtualCall - * The code here should match that generated by LowerVirtualVtableCall - */ - -GenTree* Compiler::fgExpandVirtualVtableCallTarget(GenTreeCall* call) -{ - GenTree* result; - - JITDUMP("Expanding virtual call target for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); - - noway_assert(call->gtCallType == CT_USER_FUNC); - - // get a reference to the thisPtr being passed - fgArgTabEntry* thisArgTabEntry = gtArgEntryByArgNum(call, 0); - GenTree* thisPtr = thisArgTabEntry->GetNode(); - - // fgMorphArgs must enforce this invariant by creating a temp - // - assert(thisPtr->OperIsLocal()); - - // Make a copy of the thisPtr by cloning - // - thisPtr = gtClone(thisPtr, true); - - noway_assert(thisPtr != nullptr); - - // Get hold of the vtable offset - unsigned vtabOffsOfIndirection; - unsigned vtabOffsAfterIndirection; - bool isRelative; - info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection, - &isRelative); - - // Dereference the this pointer to obtain the method table, it is called vtab below - GenTree* vtab; - assert(VPTR_OFFS == 0); // We have to add this value to the thisPtr to get the methodTable - vtab = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr); - vtab->gtFlags |= GTF_IND_INVARIANT; - - // Get the appropriate vtable chunk - if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK) - { - // Note this isRelative code path is currently never executed - // as the VM doesn't ever return: isRelative == true - // - if (isRelative) - { - // MethodTable offset is a relative pointer. - // - // Additional temporary variable is used to store virtual table pointer. - // Address of method is obtained by the next computations: - // - // Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of - // vtable-1st-level-indirection): - // tmp = vtab - // - // Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection): - // result = [tmp + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp + vtabOffsOfIndirection]] - // - // - // When isRelative is true we need to setup two temporary variables - // var1 = vtab - // var2 = var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection] - // result = [var2] + var2 - // - unsigned varNum1 = lvaGrabTemp(true DEBUGARG("var1 - vtab")); - unsigned varNum2 = lvaGrabTemp(true DEBUGARG("var2 - relative")); - GenTree* asgVar1 = gtNewTempAssign(varNum1, vtab); // var1 = vtab - - // [tmp + vtabOffsOfIndirection] - GenTree* tmpTree1 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL), - gtNewIconNode(vtabOffsOfIndirection, TYP_INT)); - tmpTree1 = gtNewOperNode(GT_IND, TYP_I_IMPL, tmpTree1, false); - tmpTree1->gtFlags |= GTF_IND_NONFAULTING; - tmpTree1->gtFlags |= GTF_IND_INVARIANT; - - // var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection - GenTree* tmpTree2 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL), - gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_INT)); - - // var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection] - tmpTree2 = gtNewOperNode(GT_ADD, TYP_I_IMPL, tmpTree2, tmpTree1); - GenTree* asgVar2 = gtNewTempAssign(varNum2, tmpTree2); // var2 = - - // This last indirection is not invariant, but is non-faulting - result = gtNewOperNode(GT_IND, TYP_I_IMPL, gtNewLclvNode(varNum2, TYP_I_IMPL), false); // [var2] - result->gtFlags |= GTF_IND_NONFAULTING; - - result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewLclvNode(varNum2, TYP_I_IMPL)); // [var2] + var2 - - // Now stitch together the two assignment and the calculation of result into a single tree - GenTree* commaTree = gtNewOperNode(GT_COMMA, TYP_I_IMPL, asgVar2, result); - result = gtNewOperNode(GT_COMMA, TYP_I_IMPL, asgVar1, commaTree); - } - else - { - // result = [vtab + vtabOffsOfIndirection] - result = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtab, gtNewIconNode(vtabOffsOfIndirection, TYP_INT)); - result = gtNewOperNode(GT_IND, TYP_I_IMPL, result, false); - result->gtFlags |= GTF_IND_NONFAULTING; - result->gtFlags |= GTF_IND_INVARIANT; - } - } - else - { - result = vtab; - assert(!isRelative); - } - - if (!isRelative) - { - // Load the function address - // result = [result + vtabOffsAfterIndirection] - result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewIconNode(vtabOffsAfterIndirection, TYP_INT)); - // This last indirection is not invariant, but is non-faulting - result = gtNewOperNode(GT_IND, TYP_I_IMPL, result, false); - result->gtFlags |= GTF_IND_NONFAULTING; - } - - return result; -} - -/***************************************************************************** - * - * Transform the given GTK_CONST tree for code generation. - */ - -GenTree* Compiler::fgMorphConst(GenTree* tree) -{ - assert(tree->OperKind() & GTK_CONST); - - /* Clear any exception flags or other unnecessary flags - * that may have been set before folding this node to a constant */ - - tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS); - - if (tree->OperGet() != GT_CNS_STR) - { - return tree; - } - - // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will - // guarantee slow performance for that block. Instead cache the return value - // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf. - - bool useLazyStrCns = false; - if (compCurBB->bbJumpKind == BBJ_THROW) - { - useLazyStrCns = true; - } - else if (fgGlobalMorph && compCurStmt->GetRootNode()->IsCall()) - { - // Quick check: if the root node of the current statement happens to be a noreturn call. - GenTreeCall* call = compCurStmt->GetRootNode()->AsCall(); - useLazyStrCns = call->IsNoReturn() || fgIsThrow(call); - } - - if (useLazyStrCns) - { - CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->AsStrCon()->gtScpHnd); - if (helper != CORINFO_HELP_UNDEF) - { - // For un-important blocks, we want to construct the string lazily - - GenTreeCall::Use* args; - if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE) - { - args = gtNewCallArgs(gtNewIconNode(RidFromToken(tree->AsStrCon()->gtSconCPX), TYP_INT)); - } - else - { - args = gtNewCallArgs(gtNewIconNode(RidFromToken(tree->AsStrCon()->gtSconCPX), TYP_INT), - gtNewIconEmbScpHndNode(tree->AsStrCon()->gtScpHnd)); - } - - tree = gtNewHelperCallNode(helper, TYP_REF, args); - return fgMorphTree(tree); - } - } - - assert(tree->AsStrCon()->gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->AsStrCon()->gtScpHnd)); - - LPVOID pValue; - InfoAccessType iat = - info.compCompHnd->constructStringLiteral(tree->AsStrCon()->gtScpHnd, tree->AsStrCon()->gtSconCPX, &pValue); - - tree = gtNewStringLiteralNode(iat, pValue); - - return fgMorphTree(tree); -} - -//------------------------------------------------------------------------ -// fgMorphTryFoldObjAsLclVar: try to fold an Obj node as a LclVar. -// -// Arguments: -// obj - the obj node. -// -// Return value: -// GenTreeLclVar if the obj can be replaced by it, null otherwise. -// -// Notes: -// TODO-CQ: currently this transformation is done only under copy block, -// but it is benefitial to do for each OBJ node. However, `PUT_ARG_STACK` -// for some platforms does not expect struct `LCL_VAR` as a source, so -// it needs more work. -// -GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj) -{ - if (opts.OptimizationEnabled()) - { - GenTree* op1 = obj->Addr(); - if (op1->OperIs(GT_ADDR)) - { - GenTreeUnOp* addr = op1->AsUnOp(); - GenTree* addrOp = addr->gtGetOp1(); - if (addrOp->TypeIs(obj->TypeGet()) && addrOp->OperIs(GT_LCL_VAR)) - { - GenTreeLclVar* lclVar = addrOp->AsLclVar(); - - ClassLayout* lclVarLayout = lvaGetDesc(lclVar)->GetLayout(); - ClassLayout* objLayout = obj->GetLayout(); - if (ClassLayout::AreCompatible(lclVarLayout, objLayout)) - { -#ifdef DEBUG - CORINFO_CLASS_HANDLE objClsHandle = obj->GetLayout()->GetClassHandle(); - assert(objClsHandle != NO_CLASS_HANDLE); - if (verbose) - { - CORINFO_CLASS_HANDLE lclClsHnd = gtGetStructHandle(lclVar); - printf("fold OBJ(ADDR(X)) [%06u] into X [%06u], ", dspTreeID(obj), dspTreeID(lclVar)); - printf("with %s handles\n", ((lclClsHnd == objClsHandle) ? "matching" : "different")); - } -#endif - // Keep the DONT_CSE flag in sync - // (as the addr always marks it for its op1) - lclVar->gtFlags &= ~GTF_DONT_CSE; - lclVar->gtFlags |= (obj->gtFlags & GTF_DONT_CSE); - - DEBUG_DESTROY_NODE(obj); - DEBUG_DESTROY_NODE(addr); - return lclVar; - } - } - } - } - return nullptr; -} - -/***************************************************************************** - * - * Transform the given GTK_LEAF tree for code generation. - */ - -GenTree* Compiler::fgMorphLeaf(GenTree* tree) -{ - assert(tree->OperKind() & GTK_LEAF); - - if (tree->gtOper == GT_LCL_VAR) - { - const bool forceRemorph = false; - return fgMorphLocalVar(tree, forceRemorph); - } - else if (tree->gtOper == GT_LCL_FLD) - { - if (lvaGetDesc(tree->AsLclFld())->lvAddrExposed) - { - tree->gtFlags |= GTF_GLOB_REF; - } - -#ifdef TARGET_X86 - if (info.compIsVarArgs) - { - GenTree* newTree = fgMorphStackArgForVarArgs(tree->AsLclFld()->GetLclNum(), tree->TypeGet(), - tree->AsLclFld()->GetLclOffs()); - if (newTree != nullptr) - { - if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) - { - newTree->SetOper(GT_IND); - } - return newTree; - } - } -#endif // TARGET_X86 - } - else if (tree->gtOper == GT_FTN_ADDR) - { - CORINFO_CONST_LOOKUP addrInfo; - -#ifdef FEATURE_READYTORUN_COMPILER - if (tree->AsFptrVal()->gtEntryPoint.addr != nullptr) - { - addrInfo = tree->AsFptrVal()->gtEntryPoint; - } - else -#endif - { - info.compCompHnd->getFunctionFixedEntryPoint(tree->AsFptrVal()->gtFptrMethod, &addrInfo); - } - - GenTree* indNode = nullptr; - switch (addrInfo.accessType) - { - case IAT_PPVALUE: - indNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)addrInfo.handle, GTF_ICON_CONST_PTR, true); - - // Add the second indirection - indNode = gtNewOperNode(GT_IND, TYP_I_IMPL, indNode); - // This indirection won't cause an exception. - indNode->gtFlags |= GTF_IND_NONFAULTING; - // This indirection also is invariant. - indNode->gtFlags |= GTF_IND_INVARIANT; - break; - - case IAT_PVALUE: - indNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)addrInfo.handle, GTF_ICON_FTN_ADDR, true); - break; - - case IAT_VALUE: - // Refer to gtNewIconHandleNode() as the template for constructing a constant handle - // - tree->SetOper(GT_CNS_INT); - tree->AsIntConCommon()->SetIconValue(ssize_t(addrInfo.handle)); - tree->gtFlags |= GTF_ICON_FTN_ADDR; - break; - - default: - noway_assert(!"Unknown addrInfo.accessType"); - } - - if (indNode != nullptr) - { - DEBUG_DESTROY_NODE(tree); - tree = fgMorphTree(indNode); - } - } - - return tree; -} - -void Compiler::fgAssignSetVarDef(GenTree* tree) -{ - GenTreeLclVarCommon* lclVarCmnTree; - bool isEntire = false; - if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire)) - { - if (isEntire) - { - lclVarCmnTree->gtFlags |= GTF_VAR_DEF; - } - else - { - // We consider partial definitions to be modeled as uses followed by definitions. - // This captures the idea that precedings defs are not necessarily made redundant - // by this definition. - lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG); - } - } -} - -//------------------------------------------------------------------------ -// fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment -// -// Arguments: -// tree - The block assignment to be possibly morphed -// -// Return Value: -// The modified tree if successful, nullptr otherwise. -// -// Assumptions: -// 'tree' must be a block assignment. -// -// Notes: -// If successful, this method always returns the incoming tree, modifying only -// its arguments. - -GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) -{ - // This must be a block assignment. - noway_assert(tree->OperIsBlkOp()); - var_types asgType = tree->TypeGet(); - - GenTree* asg = tree; - GenTree* dest = asg->gtGetOp1(); - GenTree* src = asg->gtGetOp2(); - unsigned destVarNum = BAD_VAR_NUM; - LclVarDsc* destVarDsc = nullptr; - GenTree* destLclVarTree = nullptr; - bool isCopyBlock = asg->OperIsCopyBlkOp(); - bool isInitBlock = !isCopyBlock; - - unsigned size = 0; - CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; - - if (dest->gtEffectiveVal()->OperIsBlk()) - { - GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk(); - size = lhsBlk->Size(); - if (impIsAddressInLocal(lhsBlk->Addr(), &destLclVarTree)) - { - destVarNum = destLclVarTree->AsLclVarCommon()->GetLclNum(); - destVarDsc = &(lvaTable[destVarNum]); - } - if (lhsBlk->OperGet() == GT_OBJ) - { - clsHnd = lhsBlk->AsObj()->GetLayout()->GetClassHandle(); - } - } - else - { - // Is this an enregisterable struct that is already a simple assignment? - // This can happen if we are re-morphing. - // Note that we won't do this straightaway if this is a SIMD type, since it - // may be a promoted lclVar (sometimes we promote the individual float fields of - // fixed-size SIMD). - if (dest->OperGet() == GT_IND) - { - noway_assert(asgType != TYP_STRUCT); - if (varTypeIsStruct(asgType)) - { - destLclVarTree = fgIsIndirOfAddrOfLocal(dest); - } - if (isCopyBlock && destLclVarTree == nullptr && !src->OperIs(GT_LCL_VAR)) - { - fgMorphBlockOperand(src, asgType, genTypeSize(asgType), false /*isBlkReqd*/); - return tree; - } - } - else - { - noway_assert(dest->OperIsLocal()); - destLclVarTree = dest; - } - if (destLclVarTree != nullptr) - { - destVarNum = destLclVarTree->AsLclVarCommon()->GetLclNum(); - destVarDsc = &(lvaTable[destVarNum]); - if (asgType == TYP_STRUCT) - { - clsHnd = destVarDsc->GetStructHnd(); - size = destVarDsc->lvExactSize; - } - } - if (asgType != TYP_STRUCT) - { - size = genTypeSize(asgType); - } - } - if (size == 0) - { - return nullptr; - } - - if ((destVarDsc != nullptr) && varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted) - { - // Let fgMorphCopyBlock handle it. - return nullptr; - } - - if (src->IsCall() || src->OperIsSIMD()) - { - // Can't take ADDR from these nodes, let fgMorphCopyBlock handle it, #11413. - return nullptr; - } - - if ((destVarDsc != nullptr) && !varTypeIsStruct(destVarDsc->TypeGet())) - { - - // - // See if we can do a simple transformation: - // - // GT_ASG - // / \. - // GT_IND GT_IND or CNS_INT - // | | - // [dest] [src] - // - - if (asgType == TYP_STRUCT) - { - // It is possible to use `initobj` to init a primitive type on the stack, - // like `ldloca.s 1; initobj 1B000003` where `V01` has type `ref`; - // in this case we generate `ASG struct(BLK<8> struct(ADDR byref(LCL_VAR ref)), 0)` - // and this code path transforms it into `ASG ref(LCL_VARref, 0)` because it is not a real - // struct assignment. - - if (size == REGSIZE_BYTES) - { - if (clsHnd == NO_CLASS_HANDLE) - { - // A register-sized cpblk can be treated as an integer asignment. - asgType = TYP_I_IMPL; - } - else - { - BYTE gcPtr; - info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); - asgType = getJitGCType(gcPtr); - } - } - else - { - switch (size) - { - case 1: - asgType = TYP_BYTE; - break; - case 2: - asgType = TYP_SHORT; - break; - -#ifdef TARGET_64BIT - case 4: - asgType = TYP_INT; - break; -#endif // TARGET_64BIT - } - } - } - } - - GenTree* srcLclVarTree = nullptr; - LclVarDsc* srcVarDsc = nullptr; - if (isCopyBlock) - { - if (src->OperGet() == GT_LCL_VAR) - { - srcLclVarTree = src; - srcVarDsc = &(lvaTable[src->AsLclVarCommon()->GetLclNum()]); - } - else if (src->OperIsIndir() && impIsAddressInLocal(src->AsOp()->gtOp1, &srcLclVarTree)) - { - srcVarDsc = &(lvaTable[srcLclVarTree->AsLclVarCommon()->GetLclNum()]); - } - if ((srcVarDsc != nullptr) && varTypeIsStruct(srcLclVarTree) && srcVarDsc->lvPromoted) - { - // Let fgMorphCopyBlock handle it. - return nullptr; - } - } - - if (asgType != TYP_STRUCT) - { - noway_assert((size <= REGSIZE_BYTES) || varTypeIsSIMD(asgType)); - - // For initBlk, a non constant source is not going to allow us to fiddle - // with the bits to create a single assigment. - // Nor do we (for now) support transforming an InitBlock of SIMD type, unless - // it is a direct assignment to a lclVar and the value is zero. - if (isInitBlock) - { - if (!src->IsConstInitVal()) - { - return nullptr; - } - if (varTypeIsSIMD(asgType) && (!src->IsIntegralConst(0) || (destVarDsc == nullptr))) - { - return nullptr; - } - } - - if (destVarDsc != nullptr) - { -#if LOCAL_ASSERTION_PROP - // Kill everything about dest - if (optLocalAssertionProp) - { - if (optAssertionCount > 0) - { - fgKillDependentAssertions(destVarNum DEBUGARG(tree)); - } - } -#endif // LOCAL_ASSERTION_PROP - - // A previous incarnation of this code also required the local not to be - // address-exposed(=taken). That seems orthogonal to the decision of whether - // to do field-wise assignments: being address-exposed will cause it to be - // "dependently" promoted, so it will be in the right memory location. One possible - // further reason for avoiding field-wise stores is that the struct might have alignment-induced - // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid - // concern, then we could compromise, and say that address-exposed + fields do not completely cover the - // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision. - if (varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted) - { - // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) - return nullptr; - } - else if (!varTypeIsFloating(destLclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc))) - { - // Use the dest local var directly, as well as its type. - dest = destLclVarTree; - asgType = destVarDsc->lvType; - - // If the block operation had been a write to a local var of a small int type, - // of the exact size of the small int type, and the var is NormalizeOnStore, - // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't - // have done that normalization. If we're now making it into an assignment, - // the NormalizeOnStore will work, and it can be a full def. - if (destVarDsc->lvNormalizeOnStore()) - { - dest->gtFlags &= (~GTF_VAR_USEASG); - } - } - else - { - // Could be a non-promoted struct, or a floating point type local, or - // an int subject to a partial write. Don't enregister. - lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField)); - - // Mark the local var tree as a definition point of the local. - destLclVarTree->gtFlags |= GTF_VAR_DEF; - if (size < destVarDsc->lvExactSize) - { // If it's not a full-width assignment.... - destLclVarTree->gtFlags |= GTF_VAR_USEASG; - } - - if (dest == destLclVarTree) - { - GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); - dest = gtNewIndir(asgType, addr); - } - } - } - - // Check to ensure we don't have a reducible *(& ... ) - if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR) - { - // If dest is an Indir or Block, and it has a child that is a Addr node - // - GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR - - // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'? - // - GenTree* destOp = addrNode->gtGetOp1(); - var_types destOpType = destOp->TypeGet(); - - // We can if we have a primitive integer type and the sizes are exactly the same. - // - if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType)))) - { - dest = destOp; - asgType = destOpType; - } - } - - if (dest->gtEffectiveVal()->OperIsIndir()) - { - // If we have no information about the destination, we have to assume it could - // live anywhere (not just in the GC heap). - // Mark the GT_IND node so that we use the correct write barrier helper in case - // the field is a GC ref. - - if (!fgIsIndirOfAddrOfLocal(dest)) - { - dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); - tree->gtFlags |= GTF_GLOB_REF; - } - - dest->SetIndirExceptionFlags(this); - tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT); - } - - if (isCopyBlock) - { - if (srcVarDsc != nullptr) - { - // Handled above. - assert(!varTypeIsStruct(srcLclVarTree) || !srcVarDsc->lvPromoted); - if (!varTypeIsFloating(srcLclVarTree->TypeGet()) && - size == genTypeSize(genActualType(srcLclVarTree->TypeGet()))) - { - // Use the src local var directly. - src = srcLclVarTree; - } - else - { - // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar)) - // or indir(lclVarAddr) so it must be on the stack. - unsigned lclVarNum = srcLclVarTree->AsLclVarCommon()->GetLclNum(); - lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_BlockOp)); - GenTree* srcAddr; - if (src == srcLclVarTree) - { - srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src); - src = gtNewOperNode(GT_IND, asgType, srcAddr); - } - else - { - assert(src->OperIsIndir()); - } - } - } - - if (src->OperIsIndir()) - { - if (!fgIsIndirOfAddrOfLocal(src)) - { - // If we have no information about the src, we have to assume it could - // live anywhere (not just in the GC heap). - // Mark the GT_IND node so that we use the correct write barrier helper in case - // the field is a GC ref. - src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); - } - - src->SetIndirExceptionFlags(this); - } - } - else // InitBlk - { -#if FEATURE_SIMD - if (varTypeIsSIMD(asgType)) - { - assert(!isCopyBlock); // Else we would have returned the tree above. - noway_assert(src->IsIntegralConst(0)); - noway_assert(destVarDsc != nullptr); - - src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size); - } - else -#endif - { - if (src->OperIsInitVal()) - { - src = src->gtGetOp1(); - } - assert(src->IsCnsIntOrI()); - // This will mutate the integer constant, in place, to be the correct - // value for the type we are using in the assignment. - src->AsIntCon()->FixupInitBlkValue(asgType); - } - } - - // Ensure that the dest is setup appropriately. - if (dest->gtEffectiveVal()->OperIsIndir()) - { - dest = fgMorphBlockOperand(dest, asgType, size, false /*isBlkReqd*/); - } - - // Ensure that the rhs is setup appropriately. - if (isCopyBlock) - { - src = fgMorphBlockOperand(src, asgType, size, false /*isBlkReqd*/); - } - - // Set the lhs and rhs on the assignment. - if (dest != tree->AsOp()->gtOp1) - { - asg->AsOp()->gtOp1 = dest; - } - if (src != asg->AsOp()->gtOp2) - { - asg->AsOp()->gtOp2 = src; - } - - asg->ChangeType(asgType); - dest->gtFlags |= GTF_DONT_CSE; - asg->gtFlags &= ~GTF_EXCEPT; - asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT); - // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate. - asg->gtFlags &= ~GTF_REVERSE_OPS; - -#ifdef DEBUG - if (verbose) - { - printf("fgMorphOneAsgBlock (after):\n"); - gtDispTree(tree); - } -#endif - return tree; - } - - return nullptr; -} - -//------------------------------------------------------------------------ -// fgMorphInitBlock: Morph a block initialization assignment tree. -// -// Arguments: -// tree - A GT_ASG tree that performs block initialization -// -// Return Value: -// A single assignment, when fgMorphOneAsgBlockOp transforms it. -// -// If the destination is a promoted struct local variable then we will try to -// perform a field by field assignment for each of the promoted struct fields. -// This is not always possible (e.g. if the struct has holes and custom layout). -// -// Otherwise the orginal GT_ASG tree is returned unmodified (always correct but -// least desirable because it prevents enregistration and/or blocks independent -// struct promotion). -// -// Assumptions: -// GT_ASG's children have already been morphed. -// -GenTree* Compiler::fgMorphInitBlock(GenTree* tree) -{ - // We must have the GT_ASG form of InitBlkOp. - noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp()); -#ifdef DEBUG - bool morphed = false; -#endif // DEBUG - - GenTree* src = tree->gtGetOp2(); - GenTree* origDest = tree->gtGetOp1(); - - GenTree* dest = fgMorphBlkNode(origDest, true); - if (dest != origDest) - { - tree->AsOp()->gtOp1 = dest; - } - tree->gtType = dest->TypeGet(); - JITDUMP("\nfgMorphInitBlock:"); - - GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); - if (oneAsgTree) - { - JITDUMP(" using oneAsgTree.\n"); - tree = oneAsgTree; - } - else - { - GenTreeLclVarCommon* destLclNode = nullptr; - unsigned destLclNum = BAD_VAR_NUM; - LclVarDsc* destLclVar = nullptr; - GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src; - unsigned blockSize = 0; - - if (dest->IsLocal()) - { - destLclNode = dest->AsLclVarCommon(); - destLclNum = destLclNode->GetLclNum(); - destLclVar = lvaGetDesc(destLclNum); - blockSize = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar->TypeGet()); - } - else - { - blockSize = dest->AsBlk()->Size(); - - FieldSeqNode* destFldSeq = nullptr; - if (dest->AsIndir()->Addr()->IsLocalAddrExpr(this, &destLclNode, &destFldSeq)) - { - destLclNum = destLclNode->GetLclNum(); - destLclVar = lvaGetDesc(destLclNum); - } - } - - bool destDoFldAsg = false; - - if (destLclNum != BAD_VAR_NUM) - { -#if LOCAL_ASSERTION_PROP - // Kill everything about destLclNum (and its field locals) - if (optLocalAssertionProp && (optAssertionCount > 0)) - { - fgKillDependentAssertions(destLclNum DEBUGARG(tree)); - } -#endif // LOCAL_ASSERTION_PROP - - // If we have already determined that a promoted TYP_STRUCT lclVar will not be enregistered, - // we are better off doing a block init. - if (destLclVar->lvPromoted && (!destLclVar->lvDoNotEnregister || !destLclNode->TypeIs(TYP_STRUCT))) - { - GenTree* newTree = fgMorphPromoteLocalInitBlock(destLclNode->AsLclVar(), initVal, blockSize); - - if (newTree != nullptr) - { - tree = newTree; - destDoFldAsg = true; - INDEBUG(morphed = true); - } - } - - // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister. - if (!destDoFldAsg && !destLclVar->lvRegStruct) - { - lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); - } - } - - if (!destDoFldAsg) - { - // For an InitBlock we always require a block operand. - dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockSize, true /*isBlkReqd*/); - tree->AsOp()->gtOp1 = dest; - tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); - } - } - -#ifdef DEBUG - if (morphed) - { - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; - - if (verbose) - { - printf("fgMorphInitBlock (after):\n"); - gtDispTree(tree); - } - } -#endif - - return tree; -} - -//------------------------------------------------------------------------ -// fgMorphPromoteLocalInitBlock: Attempts to promote a local block init tree -// to a tree of promoted field initialization assignments. -// -// Arguments: -// destLclNode - The destination LclVar node -// initVal - The initialization value -// blockSize - The amount of bytes to initialize -// -// Return Value: -// A tree that performs field by field initialization of the destination -// struct variable if various conditions are met, nullptr otherwise. -// -// Notes: -// This transforms a single block initialization assignment like: -// -// * ASG struct (init) -// +--* BLK(12) struct -// | \--* ADDR long -// | \--* LCL_VAR struct(P) V02 loc0 -// | \--* int V02.a (offs=0x00) -> V06 tmp3 -// | \--* ubyte V02.c (offs=0x04) -> V07 tmp4 -// | \--* float V02.d (offs=0x08) -> V08 tmp5 -// \--* INIT_VAL int -// \--* CNS_INT int 42 -// -// into a COMMA tree of assignments that initialize each promoted struct -// field: -// -// * COMMA void -// +--* COMMA void -// | +--* ASG int -// | | +--* LCL_VAR int V06 tmp3 -// | | \--* CNS_INT int 0x2A2A2A2A -// | \--* ASG ubyte -// | +--* LCL_VAR ubyte V07 tmp4 -// | \--* CNS_INT int 42 -// \--* ASG float -// +--* LCL_VAR float V08 tmp5 -// \--* CNS_DBL float 1.5113661732714390e-13 -// -GenTree* Compiler::fgMorphPromoteLocalInitBlock(GenTreeLclVar* destLclNode, GenTree* initVal, unsigned blockSize) -{ - assert(destLclNode->OperIs(GT_LCL_VAR)); - - LclVarDsc* destLclVar = lvaGetDesc(destLclNode); - assert(varTypeIsStruct(destLclVar->TypeGet())); - assert(destLclVar->lvPromoted); - - if (blockSize == 0) - { - JITDUMP(" size is zero or unknown.\n"); - return nullptr; - } - - if (destLclVar->lvAddrExposed && destLclVar->lvContainsHoles) - { - JITDUMP(" dest is address exposed and contains holes.\n"); - return nullptr; - } - - if (destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) - { - JITDUMP(" dest has custom layout and contains holes.\n"); - return nullptr; - } - - if (destLclVar->lvExactSize != blockSize) - { - JITDUMP(" dest size mismatch.\n"); - return nullptr; - } - - if (!initVal->OperIs(GT_CNS_INT)) - { - JITDUMP(" source is not constant.\n"); - return nullptr; - } - - const int64_t initPattern = (initVal->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; - - if (initPattern != 0) - { - for (unsigned i = 0; i < destLclVar->lvFieldCnt; ++i) - { - LclVarDsc* fieldDesc = lvaGetDesc(destLclVar->lvFieldLclStart + i); - - if (varTypeIsSIMD(fieldDesc->TypeGet()) || varTypeIsGC(fieldDesc->TypeGet())) - { - // Cannot initialize GC or SIMD types with a non-zero constant. - // The former is completly bogus. The later restriction could be - // lifted by supporting non-zero SIMD constants or by generating - // field initialization code that converts an integer constant to - // the appropiate SIMD value. Unlikely to be very useful, though. - JITDUMP(" dest contains GC and/or SIMD fields and source constant is not 0.\n"); - return nullptr; - } - } - } - - JITDUMP(" using field by field initialization.\n"); - - GenTree* tree = nullptr; - - for (unsigned i = 0; i < destLclVar->lvFieldCnt; ++i) - { - unsigned fieldLclNum = destLclVar->lvFieldLclStart + i; - LclVarDsc* fieldDesc = lvaGetDesc(fieldLclNum); - GenTree* dest = gtNewLclvNode(fieldLclNum, fieldDesc->TypeGet()); - // If it had been labeled a "USEASG", assignments to the individual promoted fields are not. - dest->gtFlags |= (destLclNode->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG)); - - GenTree* src; - - switch (dest->TypeGet()) - { - case TYP_BOOL: - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - // Promoted fields are expected to be "normalize on load". If that changes then - // we may need to adjust this code to widen the constant correctly. - assert(fieldDesc->lvNormalizeOnLoad()); - FALLTHROUGH; - case TYP_INT: - { - int64_t mask = (int64_t(1) << (genTypeSize(dest->TypeGet()) * 8)) - 1; - src = gtNewIconNode(static_cast(initPattern & mask)); - break; - } - case TYP_LONG: - src = gtNewLconNode(initPattern); - break; - case TYP_FLOAT: - float floatPattern; - memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); - src = gtNewDconNode(floatPattern, dest->TypeGet()); - break; - case TYP_DOUBLE: - double doublePattern; - memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); - src = gtNewDconNode(doublePattern, dest->TypeGet()); - break; - case TYP_REF: - case TYP_BYREF: -#ifdef FEATURE_SIMD - case TYP_SIMD8: - case TYP_SIMD12: - case TYP_SIMD16: - case TYP_SIMD32: -#endif // FEATURE_SIMD - assert(initPattern == 0); - src = gtNewIconNode(0, dest->TypeGet()); - break; - default: - unreached(); - } - - GenTree* asg = gtNewAssignNode(dest, src); - -#if LOCAL_ASSERTION_PROP - if (optLocalAssertionProp) - { - optAssertionGen(asg); - } -#endif // LOCAL_ASSERTION_PROP - - if (tree != nullptr) - { - tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); - } - else - { - tree = asg; - } - } - - return tree; -} - -//------------------------------------------------------------------------ -// fgMorphGetStructAddr: Gets the address of a struct object -// -// Arguments: -// pTree - the parent's pointer to the struct object node -// clsHnd - the class handle for the struct type -// isRValue - true if this is a source (not dest) -// -// Return Value: -// Returns the address of the struct value, possibly modifying the existing tree to -// sink the address below any comma nodes (this is to canonicalize for value numbering). -// If this is a source, it will morph it to an GT_IND before taking its address, -// since it may not be remorphed (and we don't want blk nodes as rvalues). - -GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue) -{ - GenTree* addr; - GenTree* tree = *pTree; - // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we - // need to hang onto that for the purposes of value numbering. - if (tree->OperIsIndir()) - { - if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0) - { - addr = tree->AsOp()->gtOp1; - } - else - { - if (isRValue && tree->OperIsBlk()) - { - tree->ChangeOper(GT_IND); - } - addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); - } - } - else if (tree->gtOper == GT_COMMA) - { - // If this is a comma, we're going to "sink" the GT_ADDR below it. - (void)fgMorphGetStructAddr(&(tree->AsOp()->gtOp2), clsHnd, isRValue); - tree->gtType = TYP_BYREF; - addr = tree; - } - else - { - switch (tree->gtOper) - { - case GT_LCL_FLD: - case GT_LCL_VAR: - case GT_INDEX: - case GT_FIELD: - case GT_ARR_ELEM: - addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); - break; - case GT_INDEX_ADDR: - addr = tree; - break; - default: - { - // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're - // not going to use "temp" - GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd); - unsigned lclNum = temp->gtEffectiveVal()->AsLclVar()->GetLclNum(); - lvaSetVarDoNotEnregister(lclNum DEBUG_ARG(DNER_VMNeedsStackAddr)); - addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue); - break; - } - } - } - *pTree = addr; - return addr; -} - -//------------------------------------------------------------------------ -// fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment -// -// Arguments: -// tree - The struct type node -// isDest - True if this is the destination of the assignment -// -// Return Value: -// Returns the possibly-morphed node. The caller is responsible for updating -// the parent of this node.. - -GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest) -{ - JITDUMP("fgMorphBlkNode for %s tree, before:\n", (isDest ? "dst" : "src")); - DISPTREE(tree); - GenTree* handleTree = nullptr; - GenTree* addr = nullptr; - if (tree->OperIs(GT_COMMA)) - { - // In order to CSE and value number array index expressions and bounds checks, - // the commas in which they are contained need to match. - // The pattern is that the COMMA should be the address expression. - // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind. - // TODO-1stClassStructs: Consider whether this can be improved. - // Example: - // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct - // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct - - addr = tree; - GenTree* effectiveVal = tree->gtEffectiveVal(); - - GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); - for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) - { - commas.Push(comma); - } - - GenTree* lastComma = commas.Top(); - noway_assert(lastComma->gtGetOp2() == effectiveVal); - GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); -#ifdef DEBUG - effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - lastComma->AsOp()->gtOp2 = effectiveValAddr; - - while (!commas.Empty()) - { - GenTree* comma = commas.Pop(); - comma->gtType = TYP_BYREF; - gtUpdateNodeSideEffects(comma); - } - - handleTree = effectiveVal; - } - else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR)) - { - handleTree = tree; - addr = tree->AsIndir()->Addr(); - } - - if (addr != nullptr) - { - var_types structType = handleTree->TypeGet(); - if (structType == TYP_STRUCT) - { - CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree); - if (structHnd == NO_CLASS_HANDLE) - { - tree = gtNewOperNode(GT_IND, structType, addr); - } - else - { - tree = gtNewObjNode(structHnd, addr); - gtSetObjGcInfo(tree->AsObj()); - } - } - else - { - tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, typGetBlkLayout(genTypeSize(structType))); - } - - gtUpdateNodeSideEffects(tree); -#ifdef DEBUG - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - } - - if (!tree->OperIsBlk()) - { - JITDUMP("fgMorphBlkNode after:\n"); - DISPTREE(tree); - return tree; - } - GenTreeBlk* blkNode = tree->AsBlk(); - if (blkNode->OperGet() == GT_DYN_BLK) - { - if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI()) - { - unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue(); - // A GT_BLK with size of zero is not supported, - // so if we encounter such a thing we just leave it as a GT_DYN_BLK - if (size != 0) - { - blkNode->AsDynBlk()->gtDynamicSize = nullptr; - blkNode->ChangeOper(GT_BLK); - blkNode->SetLayout(typGetBlkLayout(size)); - } - else - { - JITDUMP("fgMorphBlkNode after, DYN_BLK with zero size can't be morphed:\n"); - DISPTREE(blkNode); - return blkNode; - } - } - else - { - JITDUMP("fgMorphBlkNode after, DYN_BLK with non-const size can't be morphed:\n"); - DISPTREE(blkNode); - return blkNode; - } - } - GenTree* blkSrc = blkNode->Addr(); - assert(blkSrc != nullptr); - if (!blkNode->TypeIs(TYP_STRUCT) && blkSrc->OperIs(GT_ADDR) && blkSrc->gtGetOp1()->OperIs(GT_LCL_VAR)) - { - GenTreeLclVarCommon* lclVarNode = blkSrc->gtGetOp1()->AsLclVarCommon(); - if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode))) - { - lvaSetVarDoNotEnregister(lclVarNode->GetLclNum() DEBUG_ARG(DNER_VMNeedsStackAddr)); - } - } - - JITDUMP("fgMorphBlkNode after:\n"); - DISPTREE(tree); - return tree; -} - -//------------------------------------------------------------------------ -// fgMorphBlockOperand: Canonicalize an operand of a block assignment -// -// Arguments: -// tree - The block operand -// asgType - The type of the assignment -// blockWidth - The size of the block -// isBlkReqd - true iff this operand must remain a block node -// -// Return Value: -// Returns the morphed block operand -// -// Notes: -// This does the following: -// - Ensures that a struct operand is a block node or lclVar. -// - Ensures that any COMMAs are above ADDR nodes. -// Although 'tree' WAS an operand of a block assignment, the assignment -// may have been retyped to be a scalar assignment. - -GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isBlkReqd) -{ - GenTree* effectiveVal = tree->gtEffectiveVal(); - - if (asgType != TYP_STRUCT) - { - if (effectiveVal->OperIsIndir()) - { - if (!isBlkReqd) - { - GenTree* addr = effectiveVal->AsIndir()->Addr(); - if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType)) - { - effectiveVal = addr->gtGetOp1(); - } - else if (effectiveVal->OperIsBlk()) - { - effectiveVal->SetOper(GT_IND); - } - } - effectiveVal->gtType = asgType; - } - else if (effectiveVal->TypeGet() != asgType) - { - if (effectiveVal->IsCall()) - { -#ifdef DEBUG - GenTreeCall* call = effectiveVal->AsCall(); - assert(call->TypeGet() == TYP_STRUCT); - assert(blockWidth == info.compCompHnd->getClassSize(call->gtRetClsHnd)); -#endif - } - else - { - GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); - effectiveVal = gtNewIndir(asgType, addr); - } - } - } - else - { - GenTreeIndir* indirTree = nullptr; - GenTreeLclVarCommon* lclNode = nullptr; - bool needsIndirection = true; - - if (effectiveVal->OperIsIndir()) - { - indirTree = effectiveVal->AsIndir(); - GenTree* addr = effectiveVal->AsIndir()->Addr(); - if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR)) - { - lclNode = addr->gtGetOp1()->AsLclVarCommon(); - } - } - else if (effectiveVal->OperGet() == GT_LCL_VAR) - { - lclNode = effectiveVal->AsLclVarCommon(); - } - else if (effectiveVal->IsCall()) - { - needsIndirection = false; -#ifdef DEBUG - GenTreeCall* call = effectiveVal->AsCall(); - assert(call->TypeGet() == TYP_STRUCT); - assert(blockWidth == info.compCompHnd->getClassSize(call->gtRetClsHnd)); -#endif - } - - if (lclNode != nullptr) - { - LclVarDsc* varDsc = &(lvaTable[lclNode->GetLclNum()]); - if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType)) - { - if (effectiveVal != lclNode) - { - JITDUMP("Replacing block node [%06d] with lclVar V%02u\n", dspTreeID(tree), lclNode->GetLclNum()); - effectiveVal = lclNode; - } - needsIndirection = false; - } - else - { - // This may be a lclVar that was determined to be address-exposed. - effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT); - } - } - if (needsIndirection) - { - if (indirTree != nullptr) - { - if (indirTree->OperIsBlk() && !isBlkReqd) - { - effectiveVal->SetOper(GT_IND); - } - else - { - // If we have an indirection and a block is required, it should already be a block. - assert(indirTree->OperIsBlk() || !isBlkReqd); - } - effectiveVal->gtType = asgType; - } - else - { - GenTree* newTree; - GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); - if (isBlkReqd) - { - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal); - if (clsHnd == NO_CLASS_HANDLE) - { - newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, typGetBlkLayout(blockWidth)); - } - else - { - newTree = gtNewObjNode(clsHnd, addr); - gtSetObjGcInfo(newTree->AsObj()); - } - } - else - { - newTree = gtNewIndir(asgType, addr); - } - effectiveVal = newTree; - } - } - } - assert(effectiveVal->TypeIs(asgType) || (varTypeIsSIMD(asgType) && varTypeIsStruct(effectiveVal))); - tree = effectiveVal; - return tree; -} - -//------------------------------------------------------------------------ -// fgMorphCopyBlock: Perform the Morphing of block copy -// -// Arguments: -// tree - a block copy (i.e. an assignment with a block op on the lhs). -// -// Return Value: -// We can return the orginal block copy unmodified (least desirable, but always correct) -// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable). -// If we have performed struct promotion of the Source() or the Dest() then we will try to -// perform a field by field assignment for each of the promoted struct fields. -// -// Assumptions: -// The child nodes for tree have already been Morphed. -// -// Notes: -// If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest(). -// When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes -// and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes. -// if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we -// can not use a field by field assignment and must leave the orginal block copy unmodified. - -GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) -{ - noway_assert(tree->OperIsCopyBlkOp()); - - JITDUMP("fgMorphCopyBlock:\n"); - - bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0; - - GenTreeOp* asg = tree->AsOp(); - GenTree* src = asg->gtGetOp2(); - GenTree* dest = asg->gtGetOp1(); - -#if FEATURE_MULTIREG_RET - // If this is a multi-reg return, we will not do any morphing of this node. - if (src->IsMultiRegCall()) - { - assert(dest->OperGet() == GT_LCL_VAR); - JITDUMP(" not morphing a multireg call return\n"); - return tree; - } - else if (dest->IsMultiRegLclVar() && !src->IsMultiRegNode()) - { - dest->AsLclVar()->ClearMultiReg(); - } -#endif // FEATURE_MULTIREG_RET - - if (src->IsCall()) - { - if (dest->OperIs(GT_OBJ)) - { - GenTreeLclVar* lclVar = fgMorphTryFoldObjAsLclVar(dest->AsObj()); - if (lclVar != nullptr) - { - dest = lclVar; - asg->gtOp1 = lclVar; - } - } - - if (dest->OperIs(GT_LCL_VAR)) - { - LclVarDsc* varDsc = lvaGetDesc(dest->AsLclVar()); - if (varTypeIsStruct(varDsc) && varDsc->CanBeReplacedWithItsField(this)) - { - JITDUMP(" not morphing a single reg call return\n"); - return tree; - } - } - } - - // If we have an array index on the lhs, we need to create an obj node. - - dest = fgMorphBlkNode(dest, true); - if (dest != asg->gtGetOp1()) - { - asg->gtOp1 = dest; - if (dest->IsLocal()) - { - dest->gtFlags |= GTF_VAR_DEF; - } - } -#ifdef DEBUG - if (asg->TypeGet() != dest->TypeGet()) - { - JITDUMP("changing type of dest from %-6s to %-6s\n", varTypeName(asg->TypeGet()), varTypeName(dest->TypeGet())); - } -#endif - asg->ChangeType(dest->TypeGet()); - src = fgMorphBlkNode(src, false); - - asg->gtOp2 = src; - - GenTree* oldTree = tree; - GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); - - if (oneAsgTree) - { - JITDUMP(" using oneAsgTree.\n"); - tree = oneAsgTree; - } - else - { - unsigned blockWidth; - bool blockWidthIsConst = false; - GenTreeLclVarCommon* lclVarTree = nullptr; - GenTreeLclVarCommon* srcLclVarTree = nullptr; - unsigned destLclNum = BAD_VAR_NUM; - unsigned modifiedLclNum = BAD_VAR_NUM; - LclVarDsc* destLclVar = nullptr; - FieldSeqNode* destFldSeq = nullptr; - unsigned destLclOffset = 0; - bool destDoFldAsg = false; - GenTree* destAddr = nullptr; - GenTree* srcAddr = nullptr; - bool destOnStack = false; - bool hasGCPtrs = false; - - JITDUMP("block assignment to morph:\n"); - DISPTREE(asg); - - if (dest->IsLocal()) - { - blockWidthIsConst = true; - destOnStack = true; - modifiedLclNum = dest->AsLclVarCommon()->GetLclNum(); - if (dest->gtOper == GT_LCL_VAR) - { - lclVarTree = dest->AsLclVarCommon(); - destLclNum = modifiedLclNum; - destLclVar = &lvaTable[destLclNum]; - if (destLclVar->lvType == TYP_STRUCT) - { - // It would be nice if lvExactSize always corresponded to the size of the struct, - // but it doesn't always for the temps that the importer creates when it spills side - // effects. - // TODO-Cleanup: Determine when this happens, and whether it can be changed. - blockWidth = info.compCompHnd->getClassSize(destLclVar->GetStructHnd()); - } - else - { - blockWidth = genTypeSize(destLclVar->lvType); - } - hasGCPtrs = destLclVar->HasGCPtr(); - } - else - { - assert(dest->TypeGet() != TYP_STRUCT); - assert(dest->gtOper == GT_LCL_FLD); - GenTreeLclFld* destFld = dest->AsLclFld(); - blockWidth = genTypeSize(destFld->TypeGet()); - destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, destFld); - destFldSeq = destFld->GetFieldSeq(); - destLclOffset = destFld->GetLclOffs(); - } - } - else - { - GenTree* effectiveDest = dest->gtEffectiveVal(); - if (effectiveDest->OperGet() == GT_IND) - { - assert(dest->TypeGet() != TYP_STRUCT); - blockWidth = genTypeSize(effectiveDest->TypeGet()); - blockWidthIsConst = true; - if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) - { - destAddr = dest->gtGetOp1(); - } - } - else - { - assert(effectiveDest->OperIsBlk()); - GenTreeBlk* blk = effectiveDest->AsBlk(); - - blockWidth = blk->Size(); - blockWidthIsConst = (blk->gtOper != GT_DYN_BLK); - if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) - { - destAddr = blk->Addr(); - } - } - if (destAddr != nullptr) - { - noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL); - if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) - { - destOnStack = true; - destLclNum = lclVarTree->GetLclNum(); - modifiedLclNum = destLclNum; - destLclVar = &lvaTable[destLclNum]; - destLclOffset = lclVarTree->GetLclOffs(); - } - } - } - -#if LOCAL_ASSERTION_PROP - // Kill everything about modifiedLclNum (and its field locals) - if ((modifiedLclNum != BAD_VAR_NUM) && optLocalAssertionProp) - { - if (optAssertionCount > 0) - { - fgKillDependentAssertions(modifiedLclNum DEBUGARG(tree)); - } - } -#endif // LOCAL_ASSERTION_PROP - - if (destLclVar != nullptr) - { - if (destLclVar->lvPromoted && blockWidthIsConst) - { - noway_assert(varTypeIsStruct(destLclVar)); - noway_assert(!opts.MinOpts()); - - if (blockWidth == destLclVar->lvExactSize) - { - JITDUMP(" (destDoFldAsg=true)"); - // We may decide later that a copyblk is required when this struct has holes - destDoFldAsg = true; - } - else - { - JITDUMP(" with mismatched dest size"); - } - } - } - - FieldSeqNode* srcFldSeq = nullptr; - unsigned srcLclNum = BAD_VAR_NUM; - LclVarDsc* srcLclVar = nullptr; - unsigned srcLclOffset = 0; - bool srcDoFldAsg = false; - - bool srcUseLclFld = false; - bool destUseLclFld = false; - - if (src->IsLocal()) - { - srcLclVarTree = src->AsLclVarCommon(); - srcLclNum = srcLclVarTree->GetLclNum(); - if (src->OperGet() == GT_LCL_FLD) - { - srcFldSeq = src->AsLclFld()->GetFieldSeq(); - } - } - else if (src->OperIsIndir()) - { - if (src->AsOp()->gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq)) - { - srcLclNum = srcLclVarTree->GetLclNum(); - } - else - { - srcAddr = src->AsOp()->gtOp1; - } - } - - if (srcLclNum != BAD_VAR_NUM) - { - srcLclOffset = srcLclVarTree->GetLclOffs(); - srcLclVar = &lvaTable[srcLclNum]; - - if (srcLclVar->lvPromoted && blockWidthIsConst) - { - noway_assert(varTypeIsStruct(srcLclVar)); - noway_assert(!opts.MinOpts()); - - if (blockWidth == srcLclVar->lvExactSize) - { - JITDUMP(" (srcDoFldAsg=true)"); - // We may decide later that a copyblk is required when this struct has holes - srcDoFldAsg = true; - } - else - { - JITDUMP(" with mismatched src size"); - } - } - } - - // Check to see if we are doing a copy to/from the same local block. - // If so, morph it to a nop. - if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq) && - destFldSeq != FieldSeqStore::NotAField()) - { - JITDUMP("Self-copy; replaced with a NOP.\n"); - GenTree* nop = gtNewNothingNode(); - INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return nop; - } - - // Check to see if we are required to do a copy block because the struct contains holes - // and either the src or dest is externally visible - // - bool requiresCopyBlock = false; - bool srcSingleLclVarAsg = false; - bool destSingleLclVarAsg = false; - - // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock. - if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct)) - { - requiresCopyBlock = true; - } - - // Can we use field by field assignment for the dest? - if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) - { - JITDUMP(" dest contains custom layout and contains holes"); - // C++ style CopyBlock with holes - requiresCopyBlock = true; - } - - // Can we use field by field assignment for the src? - if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles) - { - JITDUMP(" src contains custom layout and contains holes"); - // C++ style CopyBlock with holes - requiresCopyBlock = true; - } - -#if defined(TARGET_ARM) - if ((src->OperIsIndir()) && (src->gtFlags & GTF_IND_UNALIGNED)) - { - JITDUMP(" src is unaligned"); - requiresCopyBlock = true; - } - - if (asg->gtFlags & GTF_BLK_UNALIGNED) - { - JITDUMP(" asg is unaligned"); - requiresCopyBlock = true; - } -#endif // TARGET_ARM - - // Don't use field by field assignment if the src is a call, - // lowering will handle it without spilling the call result into memory - // to access the individual fields. - // - if (src->OperGet() == GT_CALL) - { - JITDUMP(" src is a call"); - requiresCopyBlock = true; - } - - // If we passed the above checks, then we will check these two - if (!requiresCopyBlock) - { - // It is not always profitable to do field by field init for structs that are allocated to memory. - // A struct with 8 bool fields will require 8 moves instead of one if we do this transformation. - // A simple heuristic when field by field copy is prefered: - // - if fields can be enregistered; - // - if the struct has GCPtrs (block copy would be done via helper that is expensive); - // - if the struct has only one field. - bool dstFldIsProfitable = - ((destLclVar != nullptr) && - (!destLclVar->lvDoNotEnregister || destLclVar->HasGCPtr() || (destLclVar->lvFieldCnt == 1))); - bool srcFldIsProfitable = - ((srcLclVar != nullptr) && - (!srcLclVar->lvDoNotEnregister || srcLclVar->HasGCPtr() || (srcLclVar->lvFieldCnt == 1))); - // Are both dest and src promoted structs? - if (destDoFldAsg && srcDoFldAsg && (dstFldIsProfitable || srcFldIsProfitable)) - { - // Both structs should be of the same type, or have the same number of fields of the same type. - // If not we will use a copy block. - bool misMatchedTypes = false; - if (destLclVar->GetStructHnd() != srcLclVar->GetStructHnd()) - { - if (destLclVar->lvFieldCnt != srcLclVar->lvFieldCnt) - { - misMatchedTypes = true; - } - else - { - for (int i = 0; i < destLclVar->lvFieldCnt; i++) - { - LclVarDsc* destFieldVarDsc = lvaGetDesc(destLclVar->lvFieldLclStart + i); - LclVarDsc* srcFieldVarDsc = lvaGetDesc(srcLclVar->lvFieldLclStart + i); - if ((destFieldVarDsc->lvType != srcFieldVarDsc->lvType) || - (destFieldVarDsc->lvFldOffset != srcFieldVarDsc->lvFldOffset)) - { - misMatchedTypes = true; - break; - } - } - } - if (misMatchedTypes) - { - requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock - JITDUMP(" with mismatched types"); - } - } - } - else if (destDoFldAsg && dstFldIsProfitable) - { - // Match the following kinds of trees: - // fgMorphTree BB01, stmt 9 (before) - // [000052] ------------ const int 8 - // [000053] -A--G------- copyBlk void - // [000051] ------------ addr byref - // [000050] ------------ lclVar long V07 loc5 - // [000054] --------R--- void - // [000049] ------------ addr byref - // [000048] ------------ lclVar struct(P) V06 loc4 - // long V06.h (offs=0x00) -> V17 tmp9 - // Yields this transformation - // fgMorphCopyBlock (after): - // [000050] ------------ lclVar long V07 loc5 - // [000085] -A---------- = long - // [000083] D------N---- lclVar long V17 tmp9 - // - if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) && - (blockWidth == genTypeSize(srcLclVar->TypeGet()))) - { - // Reject the following tree: - // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe - // - // fgMorphTree BB01, stmt 6 (before) - // [000038] ------------- const int 4 - // [000039] -A--G-------- copyBlk void - // [000037] ------------- addr byref - // [000036] ------------- lclVar int V05 loc3 - // [000040] --------R---- void - // [000035] ------------- addr byref - // [000034] ------------- lclVar struct(P) V04 loc2 - // float V04.f1 (offs=0x00) -> V13 tmp6 - // As this would framsform into - // float V13 = int V05 - // - unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart; - var_types destType = lvaTable[fieldLclNum].TypeGet(); - if (srcLclVar->TypeGet() == destType) - { - srcSingleLclVarAsg = true; - } - } - } - else if (srcDoFldAsg && srcFldIsProfitable) - { - // Check for the symmetric case (which happens for the _pointer field of promoted spans): - // - // [000240] -----+------ /--* lclVar struct(P) V18 tmp9 - // /--* byref V18._value (offs=0x00) -> V30 tmp21 - // [000245] -A------R--- * = struct (copy) - // [000244] -----+------ \--* obj(8) struct - // [000243] -----+------ \--* addr byref - // [000242] D----+-N---- \--* lclVar byref V28 tmp19 - // - if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) && - (blockWidth == genTypeSize(destLclVar->TypeGet()))) - { - // Check for type agreement - unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart; - var_types srcType = lvaTable[fieldLclNum].TypeGet(); - if (destLclVar->TypeGet() == srcType) - { - destSingleLclVarAsg = true; - } - } - } - // Are neither dest or src promoted structs? - else - { - assert(!(destDoFldAsg && dstFldIsProfitable) && !(srcDoFldAsg && srcFldIsProfitable)); - requiresCopyBlock = true; // Leave as a CopyBlock - JITDUMP(" with no promoted structs"); - } - } - - // If we require a copy block the set both of the field assign bools to false - if (requiresCopyBlock) - { - // If a copy block is required then we won't do field by field assignments - destDoFldAsg = false; - srcDoFldAsg = false; - } - - JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n"); - - // Mark the dest/src structs as DoNotEnreg when they are not being fully referenced as the same type. - // - if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg) - { - if (!destLclVar->lvRegStruct || (destLclVar->lvType != dest->TypeGet())) - { - if (!dest->IsMultiRegLclVar() || (blockWidth != destLclVar->lvExactSize) || - (destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)) - { - // Mark it as DoNotEnregister. - lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); - } - else if (dest->IsMultiRegLclVar()) - { - // Handle this as lvIsMultiRegRet; this signals to SSA that it can't consider these fields - // SSA candidates (we don't have a way to represent multiple SSANums on MultiRegLclVar nodes). - destLclVar->lvIsMultiRegRet = true; - } - } - } - - if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg) - { - if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet())) - { - lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp)); - } - } - - var_types asgType = dest->TypeGet(); - if (requiresCopyBlock) - { - bool isBlkReqd = (asgType == TYP_STRUCT); - dest = fgMorphBlockOperand(dest, asgType, blockWidth, isBlkReqd); - asg->AsOp()->gtOp1 = dest; - asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); - - // Eliminate the "OBJ or BLK" node on the src. - src = fgMorphBlockOperand(src, asgType, blockWidth, false /*!isBlkReqd*/); - asg->AsOp()->gtOp2 = src; - - goto _Done; - } - - // - // Otherwise we convert this CopyBlock into individual field by field assignments - // - tree = nullptr; - - GenTree* addrSpill = nullptr; - unsigned addrSpillTemp = BAD_VAR_NUM; - bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame - - unsigned fieldCnt = DUMMY_INIT(0); - - if (destDoFldAsg && srcDoFldAsg) - { - // To do fieldwise assignments for both sides. - // The structs do not have to be the same exact types but have to have same field types - // at the same offsets. - assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM); - assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt); - - fieldCnt = destLclVar->lvFieldCnt; - goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field - // assignments. - } - else if (destDoFldAsg) - { - fieldCnt = destLclVar->lvFieldCnt; - src = fgMorphBlockOperand(src, asgType, blockWidth, false /*isBlkReqd*/); - - srcUseLclFld = fgMorphCanUseLclFldForCopy(destLclNum, srcLclNum); - - if (!srcUseLclFld && srcAddr == nullptr) - { - srcAddr = fgMorphGetStructAddr(&src, destLclVar->GetStructHnd(), true /* rValue */); - } - } - else - { - assert(srcDoFldAsg); - fieldCnt = srcLclVar->lvFieldCnt; - dest = fgMorphBlockOperand(dest, asgType, blockWidth, false /*isBlkReqd*/); - if (dest->OperIsBlk()) - { - dest->SetOper(GT_IND); - dest->gtType = TYP_STRUCT; - } - destUseLclFld = fgMorphCanUseLclFldForCopy(srcLclNum, destLclNum); - if (!destUseLclFld) - { - destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); - } - } - - if (destDoFldAsg) - { - noway_assert(!srcDoFldAsg); - if (!srcUseLclFld) - { - if (gtClone(srcAddr)) - { - // srcAddr is simple expression. No need to spill. - noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); - } - else - { - // srcAddr is complex expression. Clone and spill it (unless the destination is - // a struct local that only has one field, in which case we'd only use the - // address value once...) - if (destLclVar->lvFieldCnt > 1) - { - // We will spill srcAddr (i.e. assign to a temp "BlockOp address local") - // no need to clone a new copy as it is only used once - // - addrSpill = srcAddr; // addrSpill represents the 'srcAddr' - } - } - } - } - - if (srcDoFldAsg) - { - noway_assert(!destDoFldAsg); - - // If we're doing field-wise stores, to an address within a local, and we copy - // the address into "addrSpill", do *not* declare the original local var node in the - // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the - // field-wise assignments as an "indirect" assignment to the local. - // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before - // we clone it.) - if (lclVarTree != nullptr) - { - lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG); - } - - if (!destUseLclFld) - { - if (gtClone(destAddr)) - { - // destAddr is simple expression. No need to spill - noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); - } - else - { - // destAddr is complex expression. Clone and spill it (unless - // the source is a struct local that only has one field, in which case we'd only - // use the address value once...) - if (srcLclVar->lvFieldCnt > 1) - { - // We will spill destAddr (i.e. assign to a temp "BlockOp address local") - // no need to clone a new copy as it is only used once - // - addrSpill = destAddr; // addrSpill represents the 'destAddr' - } - } - } - } - - // TODO-CQ: this should be based on a more general - // "BaseAddress" method, that handles fields of structs, before or after - // morphing. - if ((addrSpill != nullptr) && addrSpill->OperIs(GT_ADDR)) - { - GenTree* addrSpillOp = addrSpill->AsOp()->gtGetOp1(); - if (addrSpillOp->IsLocal()) - { - // We will *not* consider this to define the local, but rather have each individual field assign - // be a definition. - addrSpillOp->gtFlags &= ~(GTF_LIVENESS_MASK); - assert(lvaGetPromotionType(addrSpillOp->AsLclVarCommon()->GetLclNum()) != PROMOTION_TYPE_INDEPENDENT); - addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our - // local stack frame - } - } - - if (addrSpill != nullptr) - { - // 'addrSpill' is already morphed - - // Spill the (complex) address to a BYREF temp. - // Note, at most one address may need to be spilled. - addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local")); - - lvaTable[addrSpillTemp].lvType = TYP_BYREF; - - if (addrSpillIsStackDest) - { - lvaTable[addrSpillTemp].lvStackByref = true; - } - - tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill); - - // If we are assigning the address of a LclVar here - // liveness does not account for this kind of address taken use. - // - // We have to mark this local as address exposed so - // that we don't delete the definition for this LclVar - // as a dead store later on. - // - if (addrSpill->OperGet() == GT_ADDR) - { - GenTree* addrOp = addrSpill->AsOp()->gtOp1; - if (addrOp->IsLocal()) - { - unsigned lclVarNum = addrOp->AsLclVarCommon()->GetLclNum(); - lvaTable[lclVarNum].lvAddrExposed = true; - lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed)); - } - } - } - - _AssignFields: - - // We may have allocated a temp above, and that may have caused the lvaTable to be expanded. - // So, beyond this point we cannot rely on the old values of 'srcLclVar' and 'destLclVar'. - for (unsigned i = 0; i < fieldCnt; ++i) - { - GenTree* dstFld; - if (destDoFldAsg) - { - noway_assert(destLclNum != BAD_VAR_NUM); - unsigned dstFieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; - dstFld = gtNewLclvNode(dstFieldLclNum, lvaTable[dstFieldLclNum].TypeGet()); - // If it had been labeled a "USEASG", assignments to the individual promoted fields are not. - if (destAddr != nullptr) - { - noway_assert(destAddr->AsOp()->gtOp1->gtOper == GT_LCL_VAR); - dstFld->gtFlags |= destAddr->AsOp()->gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); - } - else - { - noway_assert(lclVarTree != nullptr); - dstFld->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); - } - // Don't CSE the lhs of an assignment. - dstFld->gtFlags |= GTF_DONT_CSE; - } - else - { - noway_assert(srcDoFldAsg); - - if (destSingleLclVarAsg) - { - noway_assert(fieldCnt == 1); - noway_assert(destLclVar != nullptr); - noway_assert(addrSpill == nullptr); - - dstFld = gtNewLclvNode(destLclNum, destLclVar->TypeGet()); - } - else - { - GenTree* dstAddrClone = nullptr; - if (!destUseLclFld) - { - // Need address of the destination. - if (addrSpill) - { - assert(addrSpillTemp != BAD_VAR_NUM); - dstAddrClone = gtNewLclvNode(addrSpillTemp, TYP_BYREF); - } - else - { - if (i == 0) - { - // Use the orginal destAddr tree when i == 0 - dstAddrClone = destAddr; - } - else - { - // We can't clone multiple copies of a tree with persistent side effects - noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); - - dstAddrClone = gtCloneExpr(destAddr); - noway_assert(dstAddrClone != nullptr); - - JITDUMP("dstAddr - Multiple Fields Clone created:\n"); - DISPTREE(dstAddrClone); - - // Morph the newly created tree - dstAddrClone = fgMorphTree(dstAddrClone); - } - - // Is the address of a local? - GenTreeLclVarCommon* lclVarTree = nullptr; - bool isEntire = false; - bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr); - if (dstAddrClone->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire)) - { - lclVarTree->gtFlags |= GTF_VAR_DEF; - if (!isEntire) - { - lclVarTree->gtFlags |= GTF_VAR_USEASG; - } - } - } - } - - LclVarDsc* srcVarDsc = lvaGetDesc(srcLclNum); - unsigned srcFieldLclNum = srcVarDsc->lvFieldLclStart + i; - LclVarDsc* srcFieldVarDsc = lvaGetDesc(srcFieldLclNum); - - // Have to set the field sequence -- which means we need the field handle. - CORINFO_CLASS_HANDLE classHnd = srcVarDsc->GetStructHnd(); - CORINFO_FIELD_HANDLE fieldHnd = - info.compCompHnd->getFieldInClass(classHnd, srcFieldVarDsc->lvFldOrdinal); - FieldSeqNode* curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); - - unsigned srcFieldOffset = lvaGetDesc(srcFieldLclNum)->lvFldOffset; - var_types srcType = srcFieldVarDsc->TypeGet(); - - if (!destUseLclFld) - { - - if (srcFieldOffset == 0) - { - fgAddFieldSeqForZeroOffset(dstAddrClone, curFieldSeq); - } - else - { - GenTree* fieldOffsetNode = gtNewIconNode(srcFieldVarDsc->lvFldOffset, curFieldSeq); - dstAddrClone = gtNewOperNode(GT_ADD, TYP_BYREF, dstAddrClone, fieldOffsetNode); - } - - dstFld = gtNewIndir(srcType, dstAddrClone); - } - else - { - assert(dstAddrClone == nullptr); - assert((destLclOffset == 0) || (destFldSeq != nullptr)); - // If the dst was a struct type field "B" in a struct "A" then we add - // add offset of ("B" in "A") + current offset in "B". - unsigned summOffset = destLclOffset + srcFieldOffset; - dstFld = gtNewLclFldNode(destLclNum, srcType, summOffset); - FieldSeqNode* dstFldFldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq); - dstFld->AsLclFld()->SetFieldSeq(dstFldFldSeq); - - // TODO-1stClassStructs: remove this and implement storing to a field in a struct in a reg. - lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_LocalField)); - } - - // !!! The destination could be on stack. !!! - // This flag will let us choose the correct write barrier. - dstFld->gtFlags |= GTF_IND_TGTANYWHERE; - } - } - - GenTree* srcFld = nullptr; - if (srcDoFldAsg) - { - noway_assert(srcLclNum != BAD_VAR_NUM); - unsigned srcFieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; - srcFld = gtNewLclvNode(srcFieldLclNum, lvaTable[srcFieldLclNum].TypeGet()); - - noway_assert(srcLclVarTree != nullptr); - srcFld->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK; - } - else - { - noway_assert(destDoFldAsg); - noway_assert(destLclNum != BAD_VAR_NUM); - unsigned dstFieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; - - if (srcSingleLclVarAsg) - { - noway_assert(fieldCnt == 1); - noway_assert(srcLclNum != BAD_VAR_NUM); - noway_assert(addrSpill == nullptr); - - srcFld = gtNewLclvNode(srcLclNum, lvaGetDesc(srcLclNum)->TypeGet()); - } - else - { - GenTree* srcAddrClone = nullptr; - if (!srcUseLclFld) - { - // Need address of the source. - if (addrSpill) - { - assert(addrSpillTemp != BAD_VAR_NUM); - srcAddrClone = gtNewLclvNode(addrSpillTemp, TYP_BYREF); - } - else - { - if (i == 0) - { - // Use the orginal srcAddr tree when i == 0 - srcAddrClone = srcAddr; - } - else - { - // We can't clone multiple copies of a tree with persistent side effects - noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); - - srcAddrClone = gtCloneExpr(srcAddr); - noway_assert(srcAddrClone != nullptr); - - JITDUMP("srcAddr - Multiple Fields Clone created:\n"); - DISPTREE(srcAddrClone); - - // Morph the newly created tree - srcAddrClone = fgMorphTree(srcAddrClone); - } - } - } - - CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].GetStructHnd(); - CORINFO_FIELD_HANDLE fieldHnd = - info.compCompHnd->getFieldInClass(classHnd, lvaTable[dstFieldLclNum].lvFldOrdinal); - FieldSeqNode* curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); - var_types destType = lvaGetDesc(dstFieldLclNum)->lvType; - - bool done = false; - if (lvaGetDesc(dstFieldLclNum)->lvFldOffset == 0) - { - // If this is a full-width use of the src via a different type, we need to create a GT_LCL_FLD. - // (Note that if it was the same type, 'srcSingleLclVarAsg' would be true.) - if (srcLclNum != BAD_VAR_NUM) - { - noway_assert(srcLclVarTree != nullptr); - assert(destType != TYP_STRUCT); - unsigned destSize = genTypeSize(destType); - srcLclVar = lvaGetDesc(srcLclNum); - unsigned srcSize = - (srcLclVar->lvType == TYP_STRUCT) ? srcLclVar->lvExactSize : genTypeSize(srcLclVar); - if (destSize == srcSize) - { - srcLclVarTree->gtFlags |= GTF_VAR_CAST; - srcLclVarTree->ChangeOper(GT_LCL_FLD); - srcLclVarTree->gtType = destType; - srcLclVarTree->AsLclFld()->SetFieldSeq(curFieldSeq); - srcFld = srcLclVarTree; - done = true; - } - } - } - if (!done) - { - unsigned fldOffset = lvaGetDesc(dstFieldLclNum)->lvFldOffset; - if (!srcUseLclFld) - { - assert(srcAddrClone != nullptr); - if (fldOffset == 0) - { - fgAddFieldSeqForZeroOffset(srcAddrClone, curFieldSeq); - } - else - { - GenTreeIntCon* fldOffsetNode = gtNewIconNode(fldOffset, curFieldSeq); - srcAddrClone = gtNewOperNode(GT_ADD, TYP_BYREF, srcAddrClone, fldOffsetNode); - } - srcFld = gtNewIndir(destType, srcAddrClone); - } - else - { - assert((srcLclOffset == 0) || (srcFldSeq != 0)); - // If the src was a struct type field "B" in a struct "A" then we add - // add offset of ("B" in "A") + current offset in "B". - unsigned summOffset = srcLclOffset + fldOffset; - srcFld = gtNewLclFldNode(srcLclNum, destType, summOffset); - FieldSeqNode* srcFldFldSeq = GetFieldSeqStore()->Append(srcFldSeq, curFieldSeq); - srcFld->AsLclFld()->SetFieldSeq(srcFldFldSeq); - // TODO-1stClassStructs: remove this and implement reading a field from a struct in a reg. - lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_LocalField)); - } - } - } - } - assert(srcFld != nullptr); - noway_assert(dstFld->TypeGet() == srcFld->TypeGet()); - - asg = gtNewAssignNode(dstFld, srcFld); - - // If we spilled the address, and we didn't do individual field assignments to promoted fields, - // and it was of a local, ensure that the destination local variable has been marked as address - // exposed. Neither liveness nor SSA are able to track this kind of indirect assignments. - if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM) - { - noway_assert(lvaGetDesc(destLclNum)->lvAddrExposed); - } - -#if LOCAL_ASSERTION_PROP - if (optLocalAssertionProp) - { - optAssertionGen(asg); - } -#endif // LOCAL_ASSERTION_PROP - - if (tree) - { - tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); - } - else - { - tree = asg; - } - } - } - - if (isLateArg) - { - tree->gtFlags |= GTF_LATE_ARG; - } - -#ifdef DEBUG - if (tree != oldTree) - { - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; - } -#endif - -_Done: - - JITDUMP("\nfgMorphCopyBlock (after):\n"); - DISPTREE(tree); - - return tree; -} - -//------------------------------------------------------------------------ -// fgMorphCanUseLclFldForCopy: check if we can access LclVar2 using LclVar1's fields. -// -// Arguments: -// lclNum1 - a promoted lclVar that is used in fieldwise assignment; -// lclNum2 - the local variable on the other side of ASG, can be BAD_VAR_NUM. -// -// Return Value: -// True if the second local is valid and has the same struct handle as the first, -// false otherwise. -// -// Notes: -// This check is needed to avoid accesing LCL_VARs with incorrect -// CORINFO_FIELD_HANDLE that would confuse VN optimizations. -// -bool Compiler::fgMorphCanUseLclFldForCopy(unsigned lclNum1, unsigned lclNum2) -{ - assert(lclNum1 != BAD_VAR_NUM); - if (lclNum2 == BAD_VAR_NUM) - { - return false; - } - const LclVarDsc* varDsc1 = lvaGetDesc(lclNum1); - const LclVarDsc* varDsc2 = lvaGetDesc(lclNum2); - assert(varTypeIsStruct(varDsc1)); - if (!varTypeIsStruct(varDsc2)) - { - return false; - } - CORINFO_CLASS_HANDLE struct1 = varDsc1->GetStructHnd(); - CORINFO_CLASS_HANDLE struct2 = varDsc2->GetStructHnd(); - assert(struct1 != NO_CLASS_HANDLE); - assert(struct2 != NO_CLASS_HANDLE); - if (struct1 != struct2) - { - return false; - } - return true; -} - -// insert conversions and normalize to make tree amenable to register -// FP architectures -GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree) -{ - if (tree->OperIsArithmetic()) - { - if (varTypeIsFloating(tree)) - { - GenTree* op1 = tree->AsOp()->gtOp1; - GenTree* op2 = tree->gtGetOp2(); - - assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet())); - - if (op1->TypeGet() != tree->TypeGet()) - { - tree->AsOp()->gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet()); - } - if (op2->TypeGet() != tree->TypeGet()) - { - tree->AsOp()->gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet()); - } - } - } - else if (tree->OperIsCompare()) - { - GenTree* op1 = tree->AsOp()->gtOp1; - - if (varTypeIsFloating(op1)) - { - GenTree* op2 = tree->gtGetOp2(); - assert(varTypeIsFloating(op2)); - - if (op1->TypeGet() != op2->TypeGet()) - { - // both had better be floating, just one bigger than other - if (op1->TypeGet() == TYP_FLOAT) - { - assert(op2->TypeGet() == TYP_DOUBLE); - tree->AsOp()->gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); - } - else if (op2->TypeGet() == TYP_FLOAT) - { - assert(op1->TypeGet() == TYP_DOUBLE); - tree->AsOp()->gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); - } - } - } - } - - return tree; -} - -#ifdef FEATURE_SIMD - -//-------------------------------------------------------------------------------------------------------------- -// getSIMDStructFromField: -// Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for -// the struct node, also base type, field index and simd size. If it is not, just return nullptr. -// Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we -// should return nullptr, since in this case we should treat SIMD struct as a regular struct. -// However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic -// as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node -// if the struct is a SIMD struct. -// -// Arguments: -// tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd -// struct used for simd intrinsic or not. -// pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut -// to simd lclvar's base type. -// indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut -// equals to the index number of this field. -// simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut -// equals to the simd struct size which this tree belongs to. -// ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore -// the UsedInSIMDIntrinsic check. -// -// return value: -// A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd -// instrinic related field, return nullptr. -// - -GenTree* Compiler::getSIMDStructFromField(GenTree* tree, - var_types* pBaseTypeOut, - unsigned* indexOut, - unsigned* simdSizeOut, - bool ignoreUsedInSIMDIntrinsic /*false*/) -{ - GenTree* ret = nullptr; - if (tree->OperGet() == GT_FIELD) - { - GenTree* objRef = tree->AsField()->gtFldObj; - if (objRef != nullptr) - { - GenTree* obj = nullptr; - if (objRef->gtOper == GT_ADDR) - { - obj = objRef->AsOp()->gtOp1; - } - else if (ignoreUsedInSIMDIntrinsic) - { - obj = objRef; - } - else - { - return nullptr; - } - - if (isSIMDTypeLocal(obj)) - { - unsigned lclNum = obj->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[lclNum]; - if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic) - { - *simdSizeOut = varDsc->lvExactSize; - *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj); - ret = obj; - } - } - else if (obj->OperGet() == GT_SIMD) - { - ret = obj; - GenTreeSIMD* simdNode = obj->AsSIMD(); - *simdSizeOut = simdNode->gtSIMDSize; - *pBaseTypeOut = simdNode->gtSIMDBaseType; - } -#ifdef FEATURE_HW_INTRINSICS - else if (obj->OperIsHWIntrinsic()) - { - ret = obj; - GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic(); - *simdSizeOut = simdNode->gtSIMDSize; - *pBaseTypeOut = simdNode->gtSIMDBaseType; - } -#endif // FEATURE_HW_INTRINSICS - } - } - if (ret != nullptr) - { - unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut); - *indexOut = tree->AsField()->gtFldOffset / BaseTypeSize; - } - return ret; -} - -/***************************************************************************** -* If a read operation tries to access simd struct field, then transform the -* operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree. -* Otherwise, return the old tree. -* Argument: -* tree - GenTree*. If this pointer points to simd struct which is used for simd -* intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem. -* Return: -* A GenTree* which points to the new tree. If the tree is not for simd intrinsic, -* return nullptr. -*/ - -GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree) -{ - unsigned index = 0; - var_types baseType = TYP_UNKNOWN; - unsigned simdSize = 0; - GenTree* simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize); - if (simdStructNode != nullptr) - { - assert(simdSize >= ((index + 1) * genTypeSize(baseType))); - GenTree* op2 = gtNewIconNode(index); - tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize); -#ifdef DEBUG - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - } - return tree; -} - -/***************************************************************************** -* Transform an assignment of a SIMD struct field to SIMD intrinsic -* SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment, -* then return the old tree. -* Argument: -* tree - GenTree*. If this pointer points to simd struct which is used for simd -* intrinsic, we will morph it as simd intrinsic set. -* Return: -* A GenTree* which points to the new tree. If the tree is not for simd intrinsic, -* return nullptr. -*/ - -GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree) -{ - assert(tree->OperGet() == GT_ASG); - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - - unsigned index = 0; - var_types baseType = TYP_UNKNOWN; - unsigned simdSize = 0; - GenTree* simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize); - if (simdOp1Struct != nullptr) - { - // Generate the simd set intrinsic - assert(simdSize >= ((index + 1) * genTypeSize(baseType))); - - SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid; - switch (index) - { - case 0: - simdIntrinsicID = SIMDIntrinsicSetX; - break; - case 1: - simdIntrinsicID = SIMDIntrinsicSetY; - break; - case 2: - simdIntrinsicID = SIMDIntrinsicSetZ; - break; - case 3: - simdIntrinsicID = SIMDIntrinsicSetW; - break; - default: - noway_assert(!"There is no set intrinsic for index bigger than 3"); - } - - GenTree* target = gtClone(simdOp1Struct); - assert(target != nullptr); - var_types simdType = target->gtType; - GenTree* simdTree = gtNewSIMDNode(simdType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize); - - tree->AsOp()->gtOp1 = target; - tree->AsOp()->gtOp2 = simdTree; - - // fgMorphTree has already called fgMorphImplicitByRefArgs() on this assignment, but the source - // and target have not yet been morphed. - // Therefore, in case the source and/or target are now implicit byrefs, we need to call it again. - if (fgMorphImplicitByRefArgs(tree)) - { - if (tree->gtGetOp1()->OperIsBlk()) - { - assert(tree->gtGetOp1()->TypeGet() == simdType); - tree->gtGetOp1()->SetOper(GT_IND); - tree->gtGetOp1()->gtType = simdType; - } - } -#ifdef DEBUG - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - } - - return tree; -} - -#endif // FEATURE_SIMD - -//------------------------------------------------------------------------------ -// fgMorphCommutative : Try to simplify "(X op C1) op C2" to "X op C3" -// for commutative operators. -// -// Arguments: -// tree - node to fold -// -// return value: -// A folded GenTree* instance or nullptr if something prevents folding. -// - -GenTree* Compiler::fgMorphCommutative(GenTreeOp* tree) -{ - assert(varTypeIsIntegralOrI(tree->TypeGet())); - assert(tree->OperIs(GT_ADD, GT_MUL, GT_OR, GT_AND, GT_XOR)); - - // op1 can be GT_COMMA, in this case we're going to fold - // "(op (COMMA(... (op X C1))) C2)" to "(COMMA(... (op X C3)))" - GenTree* op1 = tree->gtGetOp1()->gtEffectiveVal(true); - genTreeOps oper = tree->OperGet(); - - if (!op1->OperIs(oper) || !tree->gtGetOp2()->IsCnsIntOrI() || !op1->gtGetOp2()->IsCnsIntOrI() || - op1->gtGetOp1()->IsCnsIntOrI() || gtIsActiveCSE_Candidate(op1)) - { - return nullptr; - } - - if (tree->OperMayOverflow() && (tree->gtOverflow() || op1->gtOverflow())) - { - return nullptr; - } - - GenTreeIntCon* cns1 = op1->gtGetOp2()->AsIntCon(); - GenTreeIntCon* cns2 = tree->gtGetOp2()->AsIntCon(); - - if (!varTypeIsIntegralOrI(tree->TypeGet()) || cns1->TypeIs(TYP_REF) || !cns1->TypeIs(cns2->TypeGet())) - { - return nullptr; - } - - GenTree* foldedCns = gtFoldExprConst(gtNewOperNode(oper, cns1->TypeGet(), cns1, cns2)); - if (!foldedCns->IsCnsIntOrI()) - { - // Give up if we can't fold "C1 op C2" - return nullptr; - } - - cns1->gtIconVal = foldedCns->AsIntCon()->IconValue(); - if ((oper == GT_ADD) && foldedCns->IsCnsIntOrI()) - { - cns1->AsIntCon()->gtFieldSeq = - GetFieldSeqStore()->Append(cns1->AsIntCon()->gtFieldSeq, cns2->AsIntCon()->gtFieldSeq); - } - - GenTreeOp* newTree = tree->gtGetOp1()->AsOp(); - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(cns2); - DEBUG_DESTROY_NODE(foldedCns); - INDEBUG(newTree->gtOp2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return newTree; -} - -/***************************************************************************** - * - * Transform the given GTK_SMPOP tree for code generation. - */ - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function -#endif -GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) -{ - ALLOCA_CHECK(); - assert(tree->OperKind() & GTK_SMPOP); - - /* The steps in this function are : - o Perform required preorder processing - o Process the first, then second operand, if any - o Perform required postorder morphing - o Perform optional postorder morphing if optimizing - */ - - bool isQmarkColon = false; - -#if LOCAL_ASSERTION_PROP - AssertionIndex origAssertionCount = DUMMY_INIT(0); - AssertionDsc* origAssertionTab = DUMMY_INIT(NULL); - - AssertionIndex thenAssertionCount = DUMMY_INIT(0); - AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL); -#endif - - if (fgGlobalMorph) - { - tree = fgMorphForRegisterFP(tree); - } - - genTreeOps oper = tree->OperGet(); - var_types typ = tree->TypeGet(); - GenTree* op1 = tree->AsOp()->gtOp1; - GenTree* op2 = tree->gtGetOp2IfPresent(); - - /*------------------------------------------------------------------------- - * First do any PRE-ORDER processing - */ - - switch (oper) - { - // Some arithmetic operators need to use a helper call to the EE - int helper; - - case GT_ASG: - tree = fgDoNormalizeOnStore(tree); - /* fgDoNormalizeOnStore can change op2 */ - noway_assert(op1 == tree->AsOp()->gtOp1); - op2 = tree->AsOp()->gtOp2; - -#ifdef FEATURE_SIMD - { - // We should check whether op2 should be assigned to a SIMD field or not. - // If it is, we should tranlate the tree to simd intrinsic. - assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0)); - GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree); - typ = tree->TypeGet(); - op1 = tree->gtGetOp1(); - op2 = tree->gtGetOp2(); -#ifdef DEBUG - assert((tree == newTree) && (tree->OperGet() == oper)); - if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0) - { - tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; - } -#endif // DEBUG - } -#endif - - // We can't CSE the LHS of an assignment. Only r-values can be CSEed. - // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former - // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type) - // TODO-1stClassStructs: improve this. - if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) - { - op1->gtFlags |= GTF_DONT_CSE; - } - break; - - case GT_ADDR: - - /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ - op1->gtFlags |= GTF_DONT_CSE; - break; - - case GT_QMARK: - case GT_JTRUE: - - noway_assert(op1); - - if (op1->OperKind() & GTK_RELOP) - { - noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK)); - /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does - not need to materialize the result as a 0 or 1. */ - - /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */ - op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - - // Request that the codegen for op1 sets the condition flags - // when it generates the code for op1. - // - // Codegen for op1 must set the condition flags if - // this method returns true. - // - op1->gtRequestSetFlags(); - } - else - { - GenTree* effOp1 = op1->gtEffectiveVal(); - noway_assert((effOp1->gtOper == GT_CNS_INT) && - (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); - } - break; - - case GT_COLON: -#if LOCAL_ASSERTION_PROP - if (optLocalAssertionProp) -#endif - { - isQmarkColon = true; - } - break; - - case GT_INDEX: - return fgMorphArrayIndex(tree); - - case GT_CAST: - return fgMorphCast(tree); - - case GT_MUL: - if (opts.OptimizationEnabled() && !optValnumCSE_phase && !tree->gtOverflow()) - { - // MUL(NEG(a), C) => MUL(a, NEG(C)) - if (op1->OperIs(GT_NEG) && !op1->gtGetOp1()->IsCnsIntOrI() && op2->IsCnsIntOrI() && - !op2->IsIconHandle()) - { - GenTree* newOp1 = op1->gtGetOp1(); - GenTree* newConst = gtNewIconNode(-op2->AsIntCon()->IconValue(), op2->TypeGet()); - DEBUG_DESTROY_NODE(op1); - DEBUG_DESTROY_NODE(op2); - tree->AsOp()->gtOp1 = newOp1; - tree->AsOp()->gtOp2 = newConst; - return fgMorphSmpOp(tree, mac); - } - } - -#ifndef TARGET_64BIT - if (typ == TYP_LONG) - { - /* For (long)int1 * (long)int2, we dont actually do the - casts, and just multiply the 32 bit values, which will - give us the 64 bit result in edx:eax */ - - noway_assert(op2); - if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST && - genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) && - !op1->gtOverflow() && !op2->gtOverflow()) - { - // The casts have to be of the same signedness. - if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED)) - { - // We see if we can force an int constant to change its signedness - GenTree* constOp; - if (op1->AsCast()->CastOp()->gtOper == GT_CNS_INT) - constOp = op1; - else if (op2->AsCast()->CastOp()->gtOper == GT_CNS_INT) - constOp = op2; - else - goto NO_MUL_64RSLT; - - if (((unsigned)(constOp->AsCast()->CastOp()->AsIntCon()->gtIconVal) < (unsigned)(0x80000000))) - constOp->gtFlags ^= GTF_UNSIGNED; - else - goto NO_MUL_64RSLT; - } - - // The only combination that can overflow - if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED)) - goto NO_MUL_64RSLT; - - /* Remaining combinations can never overflow during long mul. */ - - tree->gtFlags &= ~GTF_OVERFLOW; - - /* Do unsigned mul only if the casts were unsigned */ - - tree->gtFlags &= ~GTF_UNSIGNED; - tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED; - - /* Since we are committing to GTF_MUL_64RSLT, we don't want - the casts to be folded away. So morph the castees directly */ - - op1->AsOp()->gtOp1 = fgMorphTree(op1->AsOp()->gtOp1); - op2->AsOp()->gtOp1 = fgMorphTree(op2->AsOp()->gtOp1); - - // Propagate side effect flags up the tree - op1->gtFlags &= ~GTF_ALL_EFFECT; - op1->gtFlags |= (op1->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT); - op2->gtFlags &= ~GTF_ALL_EFFECT; - op2->gtFlags |= (op2->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT); - - // If the GT_MUL can be altogether folded away, we should do that. - - if ((op1->AsCast()->CastOp()->OperKind() & op2->AsCast()->CastOp()->OperKind() & GTK_CONST) && - opts.OptEnabled(CLFLG_CONSTANTFOLD)) - { - tree->AsOp()->gtOp1 = op1 = gtFoldExprConst(op1); - tree->AsOp()->gtOp2 = op2 = gtFoldExprConst(op2); - noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST); - tree = gtFoldExprConst(tree); - noway_assert(tree->OperIsConst()); - return tree; - } - - tree->gtFlags |= GTF_MUL_64RSLT; - - // If op1 and op2 are unsigned casts, we need to do an unsigned mult - tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED); - - // Insert GT_NOP nodes for the cast operands so that they do not get folded - // And propagate the new flags. We don't want to CSE the casts because - // codegen expects GTF_MUL_64RSLT muls to have a certain layout. - - if (op1->AsCast()->CastOp()->OperGet() != GT_NOP) - { - op1->AsOp()->gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->AsCast()->CastOp()); - op1->gtFlags &= ~GTF_ALL_EFFECT; - op1->gtFlags |= (op1->AsCast()->CastOp()->gtFlags & GTF_ALL_EFFECT); - } - - if (op2->AsCast()->CastOp()->OperGet() != GT_NOP) - { - op2->AsOp()->gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->AsCast()->CastOp()); - op2->gtFlags &= ~GTF_ALL_EFFECT; - op2->gtFlags |= (op2->AsCast()->CastOp()->gtFlags & GTF_ALL_EFFECT); - } - - op1->gtFlags |= GTF_DONT_CSE; - op2->gtFlags |= GTF_DONT_CSE; - - tree->gtFlags &= ~GTF_ALL_EFFECT; - tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT); - - goto DONE_MORPHING_CHILDREN; - } - else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0) - { - NO_MUL_64RSLT: - if (tree->gtOverflow()) - helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF; - else - helper = CORINFO_HELP_LMUL; - - goto USE_HELPER_FOR_ARITH; - } - else - { - /* We are seeing this node again. We have decided to use - GTF_MUL_64RSLT, so leave it alone. */ - - assert(tree->gtIsValid64RsltMul()); - } - } -#endif // !TARGET_64BIT - break; - - case GT_ARR_LENGTH: - if (op1->OperIs(GT_CNS_STR)) - { - // Optimize `ldstr + String::get_Length()` to CNS_INT - // e.g. "Hello".Length => 5 - GenTreeIntCon* iconNode = gtNewStringLiteralLength(op1->AsStrCon()); - if (iconNode != nullptr) - { - INDEBUG(iconNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return iconNode; - } - } - break; - - case GT_DIV: - // Replace "val / dcon" with "val * (1.0 / dcon)" if dcon is a power of two. - // Powers of two within range are always exactly represented, - // so multiplication by the reciprocal is safe in this scenario - if (fgGlobalMorph && op2->IsCnsFltOrDbl()) - { - double divisor = op2->AsDblCon()->gtDconVal; - if (((typ == TYP_DOUBLE) && FloatingPointUtils::hasPreciseReciprocal(divisor)) || - ((typ == TYP_FLOAT) && FloatingPointUtils::hasPreciseReciprocal(forceCastToFloat(divisor)))) - { - oper = GT_MUL; - tree->ChangeOper(oper); - op2->AsDblCon()->gtDconVal = 1.0 / divisor; - } - } - - // array.Length is always positive so GT_DIV can be changed to GT_UDIV - // if op2 is a positive cns - if (!optValnumCSE_phase && op1->OperIs(GT_ARR_LENGTH) && op2->IsIntegralConst() && - op2->AsIntCon()->IconValue() >= 2) // for 0 and 1 it doesn't matter if it's UDIV or DIV - { - assert(tree->OperIs(GT_DIV)); - tree->ChangeOper(GT_UDIV); - return fgMorphSmpOp(tree, mac); - } - - if (opts.OptimizationEnabled() && !optValnumCSE_phase) - { - // DIV(NEG(a), C) => DIV(a, NEG(C)) - if (op1->OperIs(GT_NEG) && !op1->gtGetOp1()->IsCnsIntOrI() && op2->IsCnsIntOrI() && - !op2->IsIconHandle()) - { - ssize_t op2Value = op2->AsIntCon()->IconValue(); - if (op2Value != 1 && op2Value != -1) // Div must throw exception for int(long).MinValue / -1. - { - tree->AsOp()->gtOp1 = op1->gtGetOp1(); - DEBUG_DESTROY_NODE(op1); - tree->AsOp()->gtOp2 = gtNewIconNode(-op2Value, op2->TypeGet()); - DEBUG_DESTROY_NODE(op2); - return fgMorphSmpOp(tree, mac); - } - } - } - -#ifndef TARGET_64BIT - if (typ == TYP_LONG) - { - helper = CORINFO_HELP_LDIV; - goto USE_HELPER_FOR_ARITH; - } - -#if USE_HELPERS_FOR_INT_DIV - if (typ == TYP_INT) - { - helper = CORINFO_HELP_DIV; - goto USE_HELPER_FOR_ARITH; - } -#endif -#endif // !TARGET_64BIT - - if (op2->gtOper == GT_CAST && op2->AsOp()->gtOp1->IsCnsIntOrI()) - { - op2 = gtFoldExprConst(op2); - } - break; - - case GT_UDIV: - -#ifndef TARGET_64BIT - if (typ == TYP_LONG) - { - helper = CORINFO_HELP_ULDIV; - goto USE_HELPER_FOR_ARITH; - } -#if USE_HELPERS_FOR_INT_DIV - if (typ == TYP_INT) - { - helper = CORINFO_HELP_UDIV; - goto USE_HELPER_FOR_ARITH; - } -#endif -#endif // TARGET_64BIT - break; - - case GT_MOD: - - if (varTypeIsFloating(typ)) - { - helper = CORINFO_HELP_DBLREM; - noway_assert(op2); - if (op1->TypeGet() == TYP_FLOAT) - { - if (op2->TypeGet() == TYP_FLOAT) - { - helper = CORINFO_HELP_FLTREM; - } - else - { - tree->AsOp()->gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); - } - } - else if (op2->TypeGet() == TYP_FLOAT) - { - tree->AsOp()->gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); - } - goto USE_HELPER_FOR_ARITH; - } - - // array.Length is always positive so GT_DIV can be changed to GT_UDIV - // if op2 is a positive cns - if (!optValnumCSE_phase && op1->OperIs(GT_ARR_LENGTH) && op2->IsIntegralConst() && - op2->AsIntCon()->IconValue() >= 2) // for 0 and 1 it doesn't matter if it's UMOD or MOD - { - assert(tree->OperIs(GT_MOD)); - tree->ChangeOper(GT_UMOD); - return fgMorphSmpOp(tree, mac); - } - - // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod. - // A similar optimization for signed mod will not work for a negative perfectly divisible - // HI-word. To make it correct, we would need to divide without the sign and then flip the - // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline. - goto ASSIGN_HELPER_FOR_MOD; - - case GT_UMOD: - -#ifdef TARGET_ARMARCH -// -// Note for TARGET_ARMARCH we don't have a remainder instruction, so we don't do this optimization -// -#else // TARGET_XARCH - /* If this is an unsigned long mod with op2 which is a cast to long from a - constant int, then don't morph to a call to the helper. This can be done - faster inline using idiv. - */ - - noway_assert(op2); - if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) && - ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) && - ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED))) - { - if (op2->gtOper == GT_CAST && op2->AsCast()->CastOp()->gtOper == GT_CNS_INT && - op2->AsCast()->CastOp()->AsIntCon()->gtIconVal >= 2 && - op2->AsCast()->CastOp()->AsIntCon()->gtIconVal <= 0x3fffffff && - (tree->gtFlags & GTF_UNSIGNED) == (op2->AsCast()->CastOp()->gtFlags & GTF_UNSIGNED)) - { - tree->AsOp()->gtOp2 = op2 = fgMorphCast(op2); - noway_assert(op2->gtOper == GT_CNS_NATIVELONG); - } - - if (op2->gtOper == GT_CNS_NATIVELONG && op2->AsIntConCommon()->LngValue() >= 2 && - op2->AsIntConCommon()->LngValue() <= 0x3fffffff) - { - tree->AsOp()->gtOp1 = op1 = fgMorphTree(op1); - noway_assert(op1->TypeGet() == TYP_LONG); - - // Update flags for op1 morph - tree->gtFlags &= ~GTF_ALL_EFFECT; - - tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant - - // If op1 is a constant, then do constant folding of the division operator - if (op1->gtOper == GT_CNS_NATIVELONG) - { - tree = gtFoldExpr(tree); - } - - // We may fail to fold - if (!tree->OperIsConst()) - { - tree->AsOp()->CheckDivideByConstOptimized(this); - } - - return tree; - } - } -#endif // TARGET_XARCH - - ASSIGN_HELPER_FOR_MOD: - - // For "val % 1", return 0 if op1 doesn't have any side effects - // and we are not in the CSE phase, we cannot discard 'tree' - // because it may contain CSE expressions that we haven't yet examined. - // - if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase) - { - if (op2->IsIntegralConst(1)) - { - GenTree* zeroNode = gtNewZeroConNode(typ); -#ifdef DEBUG - zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - DEBUG_DESTROY_NODE(tree); - return zeroNode; - } - } - -#ifndef TARGET_64BIT - if (typ == TYP_LONG) - { - helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD; - goto USE_HELPER_FOR_ARITH; - } - -#if USE_HELPERS_FOR_INT_DIV - if (typ == TYP_INT) - { - if (oper == GT_UMOD) - { - helper = CORINFO_HELP_UMOD; - goto USE_HELPER_FOR_ARITH; - } - else if (oper == GT_MOD) - { - helper = CORINFO_HELP_MOD; - goto USE_HELPER_FOR_ARITH; - } - } -#endif -#endif // !TARGET_64BIT - - if (op2->gtOper == GT_CAST && op2->AsOp()->gtOp1->IsCnsIntOrI()) - { - op2 = gtFoldExprConst(op2); - } - -#ifdef TARGET_ARM64 - // For ARM64 we don't have a remainder instruction, - // The architecture manual suggests the following transformation to - // generate code for such operator: - // - // a % b = a - (a / b) * b; - // - // TODO: there are special cases where it can be done better, for example - // when the modulo operation is unsigned and the divisor is a - // integer constant power of two. In this case, we can make the transform: - // - // a % b = a & (b - 1); - // - // Lower supports it for all cases except when `a` is constant, but - // in Morph we can't guarantee that `a` won't be transformed into a constant, - // so can't guarantee that lower will be able to do this optimization. - { - // Do "a % b = a - (a / b) * b" morph always, see TODO before this block. - bool doMorphModToSubMulDiv = true; - - if (doMorphModToSubMulDiv) - { - assert(!optValnumCSE_phase); - - tree = fgMorphModToSubMulDiv(tree->AsOp()); - op1 = tree->AsOp()->gtOp1; - op2 = tree->AsOp()->gtOp2; - } - } -#else // !TARGET_ARM64 - // If b is not a power of 2 constant then lowering replaces a % b - // with a - (a / b) * b and applies magic division optimization to - // a / b. The code may already contain an a / b expression (e.g. - // x = a / 10; y = a % 10;) and then we end up with redundant code. - // If we convert % to / here we give CSE the opportunity to eliminate - // the redundant division. If there's no redundant division then - // nothing is lost, lowering would have done this transform anyway. - - if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())) - { - ssize_t divisorValue = op2->AsIntCon()->IconValue(); - size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast(divisorValue) - : static_cast(abs(divisorValue)); - - if (!isPow2(absDivisorValue)) - { - tree = fgMorphModToSubMulDiv(tree->AsOp()); - op1 = tree->AsOp()->gtOp1; - op2 = tree->AsOp()->gtOp2; - } - } -#endif // !TARGET_ARM64 - break; - - USE_HELPER_FOR_ARITH: - { - // TODO: this comment is wrong now, do an appropriate fix. - /* We have to morph these arithmetic operations into helper calls - before morphing the arguments (preorder), else the arguments - won't get correct values of fgPtrArgCntCur. - However, try to fold the tree first in case we end up with a - simple node which won't need a helper call at all */ - - noway_assert(tree->OperIsBinary()); - - GenTree* oldTree = tree; - - tree = gtFoldExpr(tree); - - // Were we able to fold it ? - // Note that gtFoldExpr may return a non-leaf even if successful - // e.g. for something like "expr / 1" - see also bug #290853 - if (tree->OperIsLeaf() || (oldTree != tree)) - { - return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree); - } - - // Did we fold it into a comma node with throw? - if (tree->gtOper == GT_COMMA) - { - noway_assert(fgIsCommaThrow(tree)); - return fgMorphTree(tree); - } - } - return fgMorphIntoHelperCall(tree, helper, gtNewCallArgs(op1, op2)); - - case GT_RETURN: - // normalize small integer return values - if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) && - fgCastNeeded(op1, info.compRetType)) - { - // Small-typed return values are normalized by the callee - op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType); - - // Propagate GTF_COLON_COND - op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND); - - tree->AsOp()->gtOp1 = fgMorphCast(op1); - - // Propagate side effect flags - tree->gtFlags &= ~GTF_ALL_EFFECT; - tree->gtFlags |= (tree->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT); - - return tree; - } - if (!tree->TypeIs(TYP_VOID)) - { - if (op1->OperIs(GT_OBJ, GT_BLK, GT_IND)) - { - op1 = fgMorphRetInd(tree->AsUnOp()); - } - if (op1->OperIs(GT_LCL_VAR)) - { - // With a `genReturnBB` this `RETURN(src)` tree will be replaced by a `ASG(genReturnLocal, src)` - // and `ASG` will be tranformed into field by field copy without parent local referencing if - // possible. - GenTreeLclVar* lclVar = op1->AsLclVar(); - unsigned lclNum = lclVar->GetLclNum(); - if ((genReturnLocal == BAD_VAR_NUM) || (genReturnLocal == lclNum)) - { - LclVarDsc* varDsc = lvaGetDesc(lclVar); - if (varDsc->CanBeReplacedWithItsField(this)) - { - // We can replace the struct with its only field and allow copy propagation to replace - // return value that was written as a field. - unsigned fieldLclNum = varDsc->lvFieldLclStart; - LclVarDsc* fieldDsc = lvaGetDesc(fieldLclNum); - - if (!varTypeIsSmallInt(fieldDsc->lvType)) - { - // TODO-CQ: support that substitution for small types without creating `CAST` node. - // When a small struct is returned in a register higher bits could be left in undefined - // state. - JITDUMP("Replacing an independently promoted local var V%02u with its only field " - "V%02u for " - "the return [%06u]\n", - lclVar->GetLclNum(), fieldLclNum, dspTreeID(tree)); - lclVar->SetLclNum(fieldLclNum); - lclVar->ChangeType(fieldDsc->lvType); - } - } - } - } - } - break; - - case GT_EQ: - case GT_NE: - { - GenTree* optimizedTree = gtFoldTypeCompare(tree); - - if (optimizedTree != tree) - { - return fgMorphTree(optimizedTree); - } - } - - FALLTHROUGH; - - case GT_GT: - { - // Try and optimize nullable boxes feeding compares - GenTree* optimizedTree = gtFoldBoxNullable(tree); - - if (optimizedTree->OperGet() != tree->OperGet()) - { - return optimizedTree; - } - else - { - tree = optimizedTree; - } - - op1 = tree->AsOp()->gtOp1; - op2 = tree->gtGetOp2IfPresent(); - - break; - } - - case GT_RUNTIMELOOKUP: - return fgMorphTree(op1); - -#ifdef TARGET_ARM - case GT_INTRINSIC: - if (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) - { - switch (tree->TypeGet()) - { - case TYP_DOUBLE: - return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewCallArgs(op1)); - case TYP_FLOAT: - return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewCallArgs(op1)); - default: - unreached(); - } - } - break; -#endif - case GT_LIST: - // Special handling for the arg list. - return fgMorphArgList(tree->AsArgList(), mac); - - case GT_PUTARG_TYPE: - return fgMorphTree(tree->AsUnOp()->gtGetOp1()); - - default: - break; - } - - /*------------------------------------------------------------------------- - * Process the first operand, if any - */ - - if (op1) - { - -#if LOCAL_ASSERTION_PROP - // If we are entering the "then" part of a Qmark-Colon we must - // save the state of the current copy assignment table - // so that we can restore this state when entering the "else" part - if (isQmarkColon) - { - noway_assert(optLocalAssertionProp); - if (optAssertionCount) - { - noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea - unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); - origAssertionTab = (AssertionDsc*)ALLOCA(tabSize); - origAssertionCount = optAssertionCount; - memcpy(origAssertionTab, optAssertionTabPrivate, tabSize); - } - else - { - origAssertionCount = 0; - origAssertionTab = nullptr; - } - } -#endif // LOCAL_ASSERTION_PROP - - // We might need a new MorphAddressContext context. (These are used to convey - // parent context about how addresses being calculated will be used; see the - // specification comment for MorphAddrContext for full details.) - // Assume it's an Ind context to start. - MorphAddrContext subIndMac1(MACK_Ind); - MorphAddrContext* subMac1 = mac; - if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind) - { - switch (tree->gtOper) - { - case GT_ADDR: - // A non-null mac here implies this node is part of an address computation. - // If so, we need to pass the existing mac down to the child node. - // - // Otherwise, use a new mac. - if (subMac1 == nullptr) - { - subMac1 = &subIndMac1; - subMac1->m_kind = MACK_Addr; - } - break; - case GT_COMMA: - // In a comma, the incoming context only applies to the rightmost arg of the - // comma list. The left arg (op1) gets a fresh context. - subMac1 = nullptr; - break; - case GT_OBJ: - case GT_BLK: - case GT_DYN_BLK: - case GT_IND: - // A non-null mac here implies this node is part of an address computation (the tree parent is - // GT_ADDR). - // If so, we need to pass the existing mac down to the child node. - // - // Otherwise, use a new mac. - if (subMac1 == nullptr) - { - subMac1 = &subIndMac1; - } - break; - default: - break; - } - } - - // For additions, if we're in an IND context keep track of whether - // all offsets added to the address are constant, and their sum. - if (tree->gtOper == GT_ADD && subMac1 != nullptr) - { - assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock. - GenTree* otherOp = tree->AsOp()->gtOp2; - // Is the other operator a constant? - if (otherOp->IsCnsIntOrI()) - { - ClrSafeInt totalOffset(subMac1->m_totalOffset); - totalOffset += otherOp->AsIntConCommon()->IconValue(); - if (totalOffset.IsOverflow()) - { - // We will consider an offset so large as to overflow as "not a constant" -- - // we will do a null check. - subMac1->m_allConstantOffsets = false; - } - else - { - subMac1->m_totalOffset += otherOp->AsIntConCommon()->IconValue(); - } - } - else - { - subMac1->m_allConstantOffsets = false; - } - } - - // If op1 is a GT_FIELD or indir, we need to pass down the mac if - // its parent is GT_ADDR, since the address of op1 - // is part of an ongoing address computation. Otherwise - // op1 represents the value of the field and so any address - // calculations it does are in a new context. - if (((op1->gtOper == GT_FIELD) || op1->OperIsIndir()) && (tree->gtOper != GT_ADDR)) - { - subMac1 = nullptr; - - // The impact of op1's value to any ongoing - // address computation is handled below when looking - // at op2. - } - - tree->AsOp()->gtOp1 = op1 = fgMorphTree(op1, subMac1); - -#if LOCAL_ASSERTION_PROP - // If we are exiting the "then" part of a Qmark-Colon we must - // save the state of the current copy assignment table - // so that we can merge this state with the "else" part exit - if (isQmarkColon) - { - noway_assert(optLocalAssertionProp); - if (optAssertionCount) - { - noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea - unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); - thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize); - thenAssertionCount = optAssertionCount; - memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize); - } - else - { - thenAssertionCount = 0; - thenAssertionTab = nullptr; - } - } -#endif // LOCAL_ASSERTION_PROP - - /* Morphing along with folding and inlining may have changed the - * side effect flags, so we have to reset them - * - * NOTE: Don't reset the exception flags on nodes that may throw */ - - assert(tree->gtOper != GT_CALL); - - if (!tree->OperRequiresCallFlag(this)) - { - tree->gtFlags &= ~GTF_CALL; - } - - /* Propagate the new flags */ - tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); - - // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does - // Similarly for clsVar - if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR)) - { - tree->gtFlags &= ~GTF_GLOB_REF; - } - } // if (op1) - - /*------------------------------------------------------------------------- - * Process the second operand, if any - */ - - if (op2) - { - -#if LOCAL_ASSERTION_PROP - // If we are entering the "else" part of a Qmark-Colon we must - // reset the state of the current copy assignment table - if (isQmarkColon) - { - noway_assert(optLocalAssertionProp); - optAssertionReset(0); - if (origAssertionCount) - { - size_t tabSize = origAssertionCount * sizeof(AssertionDsc); - memcpy(optAssertionTabPrivate, origAssertionTab, tabSize); - optAssertionReset(origAssertionCount); - } - } -#endif // LOCAL_ASSERTION_PROP - - // We might need a new MorphAddressContext context to use in evaluating op2. - // (These are used to convey parent context about how addresses being calculated - // will be used; see the specification comment for MorphAddrContext for full details.) - // Assume it's an Ind context to start. - switch (tree->gtOper) - { - case GT_ADD: - if (mac != nullptr && mac->m_kind == MACK_Ind) - { - GenTree* otherOp = tree->AsOp()->gtOp1; - // Is the other operator a constant? - if (otherOp->IsCnsIntOrI()) - { - mac->m_totalOffset += otherOp->AsIntConCommon()->IconValue(); - } - else - { - mac->m_allConstantOffsets = false; - } - } - break; - default: - break; - } - - // If op2 is a GT_FIELD or indir, we must be taking its value, - // so it should evaluate its address in a new context. - if ((op2->gtOper == GT_FIELD) || op2->OperIsIndir()) - { - // The impact of op2's value to any ongoing - // address computation is handled above when looking - // at op1. - mac = nullptr; - } - - tree->AsOp()->gtOp2 = op2 = fgMorphTree(op2, mac); - - /* Propagate the side effect flags from op2 */ - - tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT); - -#if LOCAL_ASSERTION_PROP - // If we are exiting the "else" part of a Qmark-Colon we must - // merge the state of the current copy assignment table with - // that of the exit of the "then" part. - if (isQmarkColon) - { - noway_assert(optLocalAssertionProp); - // If either exit table has zero entries then - // the merged table also has zero entries - if (optAssertionCount == 0 || thenAssertionCount == 0) - { - optAssertionReset(0); - } - else - { - size_t tabSize = optAssertionCount * sizeof(AssertionDsc); - if ((optAssertionCount != thenAssertionCount) || - (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0)) - { - // Yes they are different so we have to find the merged set - // Iterate over the copy asgn table removing any entries - // that do not have an exact match in the thenAssertionTab - AssertionIndex index = 1; - while (index <= optAssertionCount) - { - AssertionDsc* curAssertion = optGetAssertion(index); - - for (unsigned j = 0; j < thenAssertionCount; j++) - { - AssertionDsc* thenAssertion = &thenAssertionTab[j]; - - // Do the left sides match? - if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) && - (curAssertion->assertionKind == thenAssertion->assertionKind)) - { - // Do the right sides match? - if ((curAssertion->op2.kind == thenAssertion->op2.kind) && - (curAssertion->op2.lconVal == thenAssertion->op2.lconVal)) - { - goto KEEP; - } - else - { - goto REMOVE; - } - } - } - // - // If we fall out of the loop above then we didn't find - // any matching entry in the thenAssertionTab so it must - // have been killed on that path so we remove it here - // - REMOVE: - // The data at optAssertionTabPrivate[i] is to be removed - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef DEBUG - if (verbose) - { - printf("The QMARK-COLON "); - printTreeID(tree); - printf(" removes assertion candidate #%d\n", index); - } -#endif - optAssertionRemove(index); - continue; - KEEP: - // The data at optAssertionTabPrivate[i] is to be kept - index++; - } - } - } - } -#endif // LOCAL_ASSERTION_PROP - } // if (op2) - -DONE_MORPHING_CHILDREN: - - if (tree->OperIsIndirOrArrLength()) - { - tree->SetIndirExceptionFlags(this); - } - else - { - if (tree->OperMayThrow(this)) - { - // Mark the tree node as potentially throwing an exception - tree->gtFlags |= GTF_EXCEPT; - } - else - { - if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) && - ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0))) - { - tree->gtFlags &= ~GTF_EXCEPT; - } - } - } - - if (tree->OperRequiresAsgFlag()) - { - tree->gtFlags |= GTF_ASG; - } - else - { - if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) && - ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) - { - tree->gtFlags &= ~GTF_ASG; - } - } - - if (tree->OperRequiresCallFlag(this)) - { - tree->gtFlags |= GTF_CALL; - } - else - { - if (((op1 == nullptr) || ((op1->gtFlags & GTF_CALL) == 0)) && - ((op2 == nullptr) || ((op2->gtFlags & GTF_CALL) == 0))) - { - tree->gtFlags &= ~GTF_CALL; - } - } - /*------------------------------------------------------------------------- - * Now do POST-ORDER processing - */ - - if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet()))) - { - // The tree is really not GC but was marked as such. Now that the - // children have been unmarked, unmark the tree too. - - // Remember that GT_COMMA inherits it's type only from op2 - if (tree->gtOper == GT_COMMA) - { - tree->gtType = genActualType(op2->TypeGet()); - } - else - { - tree->gtType = genActualType(op1->TypeGet()); - } - } - - GenTree* oldTree = tree; - - GenTree* qmarkOp1 = nullptr; - GenTree* qmarkOp2 = nullptr; - - if ((tree->OperGet() == GT_QMARK) && (tree->AsOp()->gtOp2->OperGet() == GT_COLON)) - { - qmarkOp1 = oldTree->AsOp()->gtOp2->AsOp()->gtOp1; - qmarkOp2 = oldTree->AsOp()->gtOp2->AsOp()->gtOp2; - } - - // Try to fold it, maybe we get lucky, - tree = gtFoldExpr(tree); - - if (oldTree != tree) - { - /* if gtFoldExpr returned op1 or op2 then we are done */ - if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2)) - { - return tree; - } - - /* If we created a comma-throw tree then we need to morph op1 */ - if (fgIsCommaThrow(tree)) - { - tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1); - fgMorphTreeDone(tree); - return tree; - } - - return tree; - } - else if (tree->OperKind() & GTK_CONST) - { - return tree; - } - - /* gtFoldExpr could have used setOper to change the oper */ - oper = tree->OperGet(); - typ = tree->TypeGet(); - - /* gtFoldExpr could have changed op1 and op2 */ - op1 = tree->AsOp()->gtOp1; - op2 = tree->gtGetOp2IfPresent(); - - // Do we have an integer compare operation? - // - if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet())) - { - // Are we comparing against zero? - // - if (op2->IsIntegralConst(0)) - { - // Request that the codegen for op1 sets the condition flags - // when it generates the code for op1. - // - // Codegen for op1 must set the condition flags if - // this method returns true. - // - op1->gtRequestSetFlags(); - } - } - /*------------------------------------------------------------------------- - * Perform the required oper-specific postorder morphing - */ - - GenTree* temp; - GenTree* cns1; - GenTree* cns2; - size_t ival1, ival2; - GenTree* lclVarTree; - GenTree* effectiveOp1; - FieldSeqNode* fieldSeq = nullptr; - - switch (oper) - { - case GT_ASG: - - if (op1->OperIs(GT_LCL_VAR) && ((op1->gtFlags & GTF_VAR_FOLDED_IND) != 0)) - { - op1->gtFlags &= ~GTF_VAR_FOLDED_IND; - tree = fgDoNormalizeOnStore(tree); - op2 = tree->gtGetOp2(); - } - - lclVarTree = fgIsIndirOfAddrOfLocal(op1); - if (lclVarTree != nullptr) - { - lclVarTree->gtFlags |= GTF_VAR_DEF; - } - - effectiveOp1 = op1->gtEffectiveVal(); - - if (effectiveOp1->OperIsConst()) - { - op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1); - tree->AsOp()->gtOp1 = op1; - } - - /* If we are storing a small type, we might be able to omit a cast */ - if ((effectiveOp1->gtOper == GT_IND) && varTypeIsSmall(effectiveOp1->TypeGet())) - { - if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow()) - { - var_types castType = op2->CastToType(); - - // If we are performing a narrowing cast and - // castType is larger or the same as op1's type - // then we can discard the cast. - - if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(effectiveOp1->TypeGet()))) - { - tree->AsOp()->gtOp2 = op2 = op2->AsCast()->CastOp(); - } - } - else if (op2->OperIsCompare() && varTypeIsByte(effectiveOp1->TypeGet())) - { - /* We don't need to zero extend the setcc instruction */ - op2->gtType = TYP_BYTE; - } - } - // If we introduced a CSE we may need to undo the optimization above - // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type) - // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place. - else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR)) - { - unsigned varNum = op1->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[varNum]; - - /* We again need to zero extend the setcc instruction */ - op2->gtType = varDsc->TypeGet(); - } - fgAssignSetVarDef(tree); - - /* We can't CSE the LHS of an assignment */ - /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */ - if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) - { - op1->gtFlags |= GTF_DONT_CSE; - } - break; - - case GT_EQ: - case GT_NE: - - /* Make sure we're allowed to do this */ - - if (optValnumCSE_phase) - { - // It is not safe to reorder/delete CSE's - break; - } - - cns2 = op2; - - /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */ - - if (cns2->gtOper == GT_CNS_INT && cns2->AsIntCon()->gtIconVal != 0) - { - op1 = tree->AsOp()->gtOp1; - - /* Since this can occur repeatedly we use a while loop */ - - while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->AsOp()->gtOp2->gtOper == GT_CNS_INT) && - (op1->gtType == TYP_INT) && (op1->gtOverflow() == false)) - { - /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */ - - ival1 = op1->AsOp()->gtOp2->AsIntCon()->gtIconVal; - ival2 = cns2->AsIntCon()->gtIconVal; - - if (op1->gtOper == GT_ADD) - { - ival2 -= ival1; - } - else - { - ival2 += ival1; - } - cns2->AsIntCon()->gtIconVal = ival2; - -#ifdef TARGET_64BIT - // we need to properly re-sign-extend or truncate as needed. - cns2->AsIntCon()->TruncateOrSignExtend32(); -#endif // TARGET_64BIT - - op1 = tree->AsOp()->gtOp1 = op1->AsOp()->gtOp1; - } - } - - // - // Here we look for the following tree - // - // EQ/NE - // / \. - // op1 CNS 0/1 - // - ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1 - - // cast to unsigned allows test for both 0 and 1 - if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->AsIntConCommon()->IconValue()) <= 1U)) - { - ival2 = (size_t)cns2->AsIntConCommon()->IconValue(); - } - else // cast to UINT64 allows test for both 0 and 1 - if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->AsIntConCommon()->LngValue()) <= 1ULL)) - { - ival2 = (size_t)cns2->AsIntConCommon()->LngValue(); - } - - if (ival2 != INT_MAX) - { - // If we don't have a comma and relop, we can't do this optimization - // - if ((op1->gtOper == GT_COMMA) && (op1->AsOp()->gtOp2->OperIsCompare())) - { - // Here we look for the following transformation - // - // EQ/NE Possible REVERSE(RELOP) - // / \ / \. - // COMMA CNS 0/1 -> COMMA relop_op2 - // / \ / \. - // x RELOP x relop_op1 - // / \. - // relop_op1 relop_op2 - // - // - // - GenTree* comma = op1; - GenTree* relop = comma->AsOp()->gtOp2; - - GenTree* relop_op1 = relop->AsOp()->gtOp1; - - bool reverse = ((ival2 == 0) == (oper == GT_EQ)); - - if (reverse) - { - gtReverseCond(relop); - } - - relop->AsOp()->gtOp1 = comma; - comma->AsOp()->gtOp2 = relop_op1; - - // Comma now has fewer nodes underneath it, so we need to regenerate its flags - comma->gtFlags &= ~GTF_ALL_EFFECT; - comma->gtFlags |= (comma->AsOp()->gtOp1->gtFlags) & GTF_ALL_EFFECT; - comma->gtFlags |= (comma->AsOp()->gtOp2->gtFlags) & GTF_ALL_EFFECT; - - noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0); - noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0); - relop->gtFlags |= - tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT); - - return relop; - } - - if (op1->gtOper == GT_COMMA) - { - // Here we look for the following tree - // and when the LCL_VAR is a temp we can fold the tree: - // - // EQ/NE EQ/NE - // / \ / \. - // COMMA CNS 0/1 -> RELOP CNS 0/1 - // / \ / \. - // ASG LCL_VAR - // / \. - // LCL_VAR RELOP - // / \. - // - - GenTree* asg = op1->AsOp()->gtOp1; - GenTree* lcl = op1->AsOp()->gtOp2; - - /* Make sure that the left side of the comma is the assignment of the LCL_VAR */ - if (asg->gtOper != GT_ASG) - { - goto SKIP; - } - - /* The right side of the comma must be a LCL_VAR temp */ - if (lcl->gtOper != GT_LCL_VAR) - { - goto SKIP; - } - - unsigned lclNum = lcl->AsLclVarCommon()->GetLclNum(); - noway_assert(lclNum < lvaCount); - - /* If the LCL_VAR is not a temp then bail, a temp has a single def */ - if (!lvaTable[lclNum].lvIsTemp) - { - goto SKIP; - } - - /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */ - // Fix 383856 X86/ARM ILGEN - if (lclNumIsCSE(lclNum)) - { - goto SKIP; - } - - /* We also must be assigning the result of a RELOP */ - if (asg->AsOp()->gtOp1->gtOper != GT_LCL_VAR) - { - goto SKIP; - } - - /* Both of the LCL_VAR must match */ - if (asg->AsOp()->gtOp1->AsLclVarCommon()->GetLclNum() != lclNum) - { - goto SKIP; - } - - /* If right side of asg is not a RELOP then skip */ - if (!asg->AsOp()->gtOp2->OperIsCompare()) - { - goto SKIP; - } - - /* Set op1 to the right side of asg, (i.e. the RELOP) */ - op1 = asg->AsOp()->gtOp2; - - DEBUG_DESTROY_NODE(asg->AsOp()->gtOp1); - DEBUG_DESTROY_NODE(lcl); - } - - if (op1->OperIsCompare()) - { - // Here we look for the following tree - // - // EQ/NE -> RELOP/!RELOP - // / \ / \. - // RELOP CNS 0/1 - // / \. - // - // Note that we will remove/destroy the EQ/NE node and move - // the RELOP up into it's location. - - /* Here we reverse the RELOP if necessary */ - - bool reverse = ((ival2 == 0) == (oper == GT_EQ)); - - if (reverse) - { - gtReverseCond(op1); - } - - /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */ - op1->gtType = tree->gtType; - - noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0); - op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE); - - DEBUG_DESTROY_NODE(tree); - return op1; - } - - // - // Now we check for a compare with the result of an '&' operator - // - // Here we look for the following transformation: - // - // EQ/NE EQ/NE - // / \ / \. - // AND CNS 0/1 -> AND CNS 0 - // / \ / \. - // RSZ/RSH CNS 1 x CNS (1 << y) - // / \. - // x CNS_INT +y - - if (op1->gtOper == GT_AND) - { - GenTree* andOp = op1; - GenTree* rshiftOp = andOp->AsOp()->gtOp1; - - if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH)) - { - goto SKIP; - } - - if (!rshiftOp->AsOp()->gtOp2->IsCnsIntOrI()) - { - goto SKIP; - } - - ssize_t shiftAmount = rshiftOp->AsOp()->gtOp2->AsIntCon()->gtIconVal; - - if (shiftAmount < 0) - { - goto SKIP; - } - - if (!andOp->AsOp()->gtOp2->IsIntegralConst(1)) - { - goto SKIP; - } - - if (andOp->gtType == TYP_INT) - { - if (shiftAmount > 31) - { - goto SKIP; - } - - UINT32 newAndOperand = ((UINT32)1) << shiftAmount; - - andOp->AsOp()->gtOp2->AsIntCon()->gtIconVal = newAndOperand; - - // Reverse the cond if necessary - if (ival2 == 1) - { - gtReverseCond(tree); - cns2->AsIntCon()->gtIconVal = 0; - oper = tree->gtOper; - } - } - else if (andOp->gtType == TYP_LONG) - { - if (shiftAmount > 63) - { - goto SKIP; - } - - UINT64 newAndOperand = ((UINT64)1) << shiftAmount; - - andOp->AsOp()->gtOp2->AsIntConCommon()->SetLngValue(newAndOperand); - - // Reverse the cond if necessary - if (ival2 == 1) - { - gtReverseCond(tree); - cns2->AsIntConCommon()->SetLngValue(0); - oper = tree->gtOper; - } - } - - andOp->AsOp()->gtOp1 = rshiftOp->AsOp()->gtOp1; - - DEBUG_DESTROY_NODE(rshiftOp->AsOp()->gtOp2); - DEBUG_DESTROY_NODE(rshiftOp); - } - } // END if (ival2 != INT_MAX) - - SKIP: - /* Now check for compares with small constant longs that can be cast to int */ - - if (!cns2->OperIsConst()) - { - goto COMPARE; - } - - if (cns2->TypeGet() != TYP_LONG) - { - goto COMPARE; - } - - /* Is the constant 31 bits or smaller? */ - - if ((cns2->AsIntConCommon()->LngValue() >> 31) != 0) - { - goto COMPARE; - } - - /* Is the first comparand mask operation of type long ? */ - - if (op1->gtOper != GT_AND) - { - /* Another interesting case: cast from int */ - - if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT && - !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate - !op1->gtOverflow()) // cannot be an overflow checking cast - { - /* Simply make this into an integer comparison */ - - tree->AsOp()->gtOp1 = op1->AsCast()->CastOp(); - tree->AsOp()->gtOp2 = gtNewIconNode((int)cns2->AsIntConCommon()->LngValue(), TYP_INT); - } - - goto COMPARE; - } - - noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND); - - /* Is the result of the mask effectively an INT ? */ - - GenTree* andMask; - andMask = op1->AsOp()->gtOp2; - if (andMask->gtOper != GT_CNS_NATIVELONG) - { - goto COMPARE; - } - if ((andMask->AsIntConCommon()->LngValue() >> 32) != 0) - { - goto COMPARE; - } - - /* Now we know that we can cast AsOp()->gtOp1 of AND to int */ - - op1->AsOp()->gtOp1 = gtNewCastNode(TYP_INT, op1->AsOp()->gtOp1, false, TYP_INT); - - /* now replace the mask node (AsOp()->gtOp2 of AND node) */ - - noway_assert(andMask == op1->AsOp()->gtOp2); - - ival1 = (int)andMask->AsIntConCommon()->LngValue(); - andMask->SetOper(GT_CNS_INT); - andMask->gtType = TYP_INT; - andMask->AsIntCon()->gtIconVal = ival1; - - /* now change the type of the AND node */ - - op1->gtType = TYP_INT; - - /* finally we replace the comparand */ - - ival2 = (int)cns2->AsIntConCommon()->LngValue(); - cns2->SetOper(GT_CNS_INT); - cns2->gtType = TYP_INT; - - noway_assert(cns2 == op2); - cns2->AsIntCon()->gtIconVal = ival2; - - goto COMPARE; - - case GT_LT: - case GT_LE: - case GT_GE: - case GT_GT: - - if (op2->gtOper == GT_CNS_INT) - { - cns2 = op2; - /* Check for "expr relop 1" */ - if (cns2->IsIntegralConst(1)) - { - /* Check for "expr >= 1" */ - if (oper == GT_GE) - { - /* Change to "expr != 0" for unsigned and "expr > 0" for signed */ - oper = (tree->IsUnsigned()) ? GT_NE : GT_GT; - goto SET_OPER; - } - /* Check for "expr < 1" */ - else if (oper == GT_LT) - { - /* Change to "expr == 0" for unsigned and "expr <= 0" for signed */ - oper = (tree->IsUnsigned()) ? GT_EQ : GT_LE; - goto SET_OPER; - } - } - /* Check for "expr relop -1" */ - else if (!tree->IsUnsigned() && cns2->IsIntegralConst(-1)) - { - /* Check for "expr <= -1" */ - if (oper == GT_LE) - { - /* Change to "expr < 0" */ - oper = GT_LT; - goto SET_OPER; - } - /* Check for "expr > -1" */ - else if (oper == GT_GT) - { - /* Change to "expr >= 0" */ - oper = GT_GE; - - SET_OPER: - // IF we get here we should be changing 'oper' - assert(tree->OperGet() != oper); - - // Keep the old ValueNumber for 'tree' as the new expr - // will still compute the same value as before - tree->SetOper(oper, GenTree::PRESERVE_VN); - cns2->AsIntCon()->gtIconVal = 0; - - // vnStore is null before the ValueNumber phase has run - if (vnStore != nullptr) - { - // Update the ValueNumber for 'cns2', as we just changed it to 0 - fgValueNumberTreeConst(cns2); - } - op2 = tree->AsOp()->gtOp2 = gtFoldExpr(op2); - } - } - else if (tree->IsUnsigned() && op2->IsIntegralConst(0)) - { - if ((oper == GT_GT) || (oper == GT_LE)) - { - // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT - // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails - // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0) - // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes - // occurs as a result of branch inversion. - oper = (oper == GT_LE) ? GT_EQ : GT_NE; - tree->SetOper(oper, GenTree::PRESERVE_VN); - tree->gtFlags &= ~GTF_UNSIGNED; - } - } - } - - COMPARE: - - noway_assert(tree->OperKind() & GTK_RELOP); - break; - - case GT_MUL: - -#ifndef TARGET_64BIT - if (typ == TYP_LONG) - { - // This must be GTF_MUL_64RSLT - assert(tree->gtIsValid64RsltMul()); - return tree; - } -#endif // TARGET_64BIT - goto CM_OVF_OP; - - case GT_SUB: - - if (tree->gtOverflow()) - { - goto CM_OVF_OP; - } - - // TODO #4104: there are a lot of other places where - // this condition is not checked before transformations. - if (fgGlobalMorph) - { - /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */ - - noway_assert(op2); - if (op2->IsCnsIntOrI() && !op2->IsIconHandle()) - { - // Negate the constant and change the node to be "+", - // except when `op2` is a const byref. - - op2->AsIntConCommon()->SetIconValue(-op2->AsIntConCommon()->IconValue()); - op2->AsIntConRef().gtFieldSeq = FieldSeqStore::NotAField(); - oper = GT_ADD; - tree->ChangeOper(oper); - goto CM_ADD_OP; - } - - /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */ - - noway_assert(op1); - if (op1->IsCnsIntOrI()) - { - noway_assert(varTypeIsIntOrI(tree)); - - // The type of the new GT_NEG node cannot just be op2->TypeGet(). - // Otherwise we may sign-extend incorrectly in cases where the GT_NEG - // node ends up feeding directly into a cast, for example in - // GT_CAST(GT_SUB(0, s_1.ubyte)) - tree->AsOp()->gtOp2 = op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2); - fgMorphTreeDone(op2); - - oper = GT_ADD; - tree->ChangeOper(oper); - goto CM_ADD_OP; - } - - /* No match - exit */ - } - - // Skip optimization if non-NEG operand is constant. - // Both op1 and op2 are not constant because it was already checked above. - if (opts.OptimizationEnabled() && fgGlobalMorph && - (((op1->gtFlags & GTF_EXCEPT) == 0) || ((op2->gtFlags & GTF_EXCEPT) == 0))) - { - // a - -b = > a + b - // SUB(a, (NEG(b)) => ADD(a, b) - - if (!op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG)) - { - // tree: SUB - // op1: a - // op2: NEG - // op2Child: b - - GenTree* op2Child = op2->AsOp()->gtOp1; // b - oper = GT_ADD; - tree->SetOper(oper, GenTree::PRESERVE_VN); - tree->AsOp()->gtOp2 = op2Child; - - DEBUG_DESTROY_NODE(op2); - - op2 = op2Child; - } - - // -a - -b = > b - a - // SUB(NEG(a), (NEG(b)) => SUB(b, a) - - if (op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG)) - { - // tree: SUB - // op1: NEG - // op1Child: a - // op2: NEG - // op2Child: b - - GenTree* op1Child = op1->AsOp()->gtOp1; // a - GenTree* op2Child = op2->AsOp()->gtOp1; // b - tree->AsOp()->gtOp1 = op2Child; - tree->AsOp()->gtOp2 = op1Child; - - DEBUG_DESTROY_NODE(op1); - DEBUG_DESTROY_NODE(op2); - - op1 = op2Child; - op2 = op1Child; - } - } - - break; - -#ifdef TARGET_ARM64 - case GT_DIV: - if (!varTypeIsFloating(tree->gtType)) - { - // Codegen for this instruction needs to be able to throw two exceptions: - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); - } - break; - case GT_UDIV: - // Codegen for this instruction needs to be able to throw one exception: - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); - break; -#endif - - case GT_ADD: - - CM_OVF_OP: - if (tree->gtOverflow()) - { - tree->gtRequestSetFlags(); - - // Add the excptn-throwing basic block to jump to on overflow - - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); - - // We can't do any commutative morphing for overflow instructions - - break; - } - - CM_ADD_OP: - - case GT_OR: - case GT_XOR: - case GT_AND: - - /* Commute any non-REF constants to the right */ - - noway_assert(op1); - if (op1->OperIsConst() && (op1->gtType != TYP_REF)) - { - // TODO-Review: We used to assert here that - // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD)); - // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation, - // and would sometimes hit this assertion. This may indicate a missed "remorph". - // Task is to re-enable this assertion and investigate. - - /* Swap the operands */ - tree->AsOp()->gtOp1 = op2; - tree->AsOp()->gtOp2 = op1; - - op1 = op2; - op2 = tree->AsOp()->gtOp2; - } - - // See if we can fold floating point operations (can regress minopts mode) - if (opts.OptimizationEnabled() && varTypeIsFloating(tree->TypeGet()) && !optValnumCSE_phase) - { - if ((oper == GT_MUL) && !op1->IsCnsFltOrDbl() && op2->IsCnsFltOrDbl()) - { - if (op2->AsDblCon()->gtDconVal == 2.0) - { - bool needsComma = !op1->OperIsLeaf() && !op1->IsLocal(); - // if op1 is not a leaf/local we have to introduce a temp via GT_COMMA. - // Unfortunately, it's not optHoistLoopCode-friendly yet so let's do it later. - if (!needsComma || (fgOrder == FGOrderLinear)) - { - // Fold "x*2.0" to "x+x" - op2 = fgMakeMultiUse(&tree->AsOp()->gtOp1); - op1 = tree->AsOp()->gtOp1; - oper = GT_ADD; - tree = gtNewOperNode(oper, tree->TypeGet(), op1, op2); - INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - } - } - else if (op2->AsDblCon()->gtDconVal == 1.0) - { - // Fold "x*1.0" to "x" - DEBUG_DESTROY_NODE(op2); - DEBUG_DESTROY_NODE(tree); - return op1; - } - } - } - - /* See if we can fold GT_ADD nodes. */ - - if (oper == GT_ADD) - { - /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */ - - if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) && - op1->AsOp()->gtOp2->gtOper == GT_CNS_INT && op2->AsOp()->gtOp2->gtOper == GT_CNS_INT && - !op1->gtOverflow() && !op2->gtOverflow()) - { - // Don't create a byref pointer that may point outside of the ref object. - // If a GC happens, the byref won't get updated. This can happen if one - // of the int components is negative. It also requires the address generation - // be in a fully-interruptible code region. - if (!varTypeIsGC(op1->AsOp()->gtOp1->TypeGet()) && !varTypeIsGC(op2->AsOp()->gtOp1->TypeGet())) - { - cns1 = op1->AsOp()->gtOp2; - cns2 = op2->AsOp()->gtOp2; - cns1->AsIntCon()->gtIconVal += cns2->AsIntCon()->gtIconVal; -#ifdef TARGET_64BIT - if (cns1->TypeGet() == TYP_INT) - { - // we need to properly re-sign-extend or truncate after adding two int constants above - cns1->AsIntCon()->TruncateOrSignExtend32(); - } -#endif // TARGET_64BIT - - tree->AsOp()->gtOp2 = cns1; - DEBUG_DESTROY_NODE(cns2); - - op1->AsOp()->gtOp2 = op2->AsOp()->gtOp1; - op1->gtFlags |= (op1->AsOp()->gtOp2->gtFlags & GTF_ALL_EFFECT); - DEBUG_DESTROY_NODE(op2); - op2 = tree->AsOp()->gtOp2; - } - } - - if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ)) - { - CLANG_FORMAT_COMMENT_ANCHOR; - - // Fold (x + 0). - - if ((op2->AsIntConCommon()->IconValue() == 0) && !gtIsActiveCSE_Candidate(tree)) - { - - // If this addition is adding an offset to a null pointer, - // avoid the work and yield the null pointer immediately. - // Dereferencing the pointer in either case will have the - // same effect. - - if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) && - ((op1->gtFlags & GTF_ALL_EFFECT) == 0)) - { - op2->gtType = tree->gtType; - DEBUG_DESTROY_NODE(op1); - DEBUG_DESTROY_NODE(tree); - return op2; - } - - // Remove the addition iff it won't change the tree type - // to TYP_REF. - - if (!gtIsActiveCSE_Candidate(op2) && - ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF))) - { - if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) && - (op2->AsIntCon()->gtFieldSeq != nullptr) && - (op2->AsIntCon()->gtFieldSeq != FieldSeqStore::NotAField())) - { - fgAddFieldSeqForZeroOffset(op1, op2->AsIntCon()->gtFieldSeq); - } - - DEBUG_DESTROY_NODE(op2); - DEBUG_DESTROY_NODE(tree); - - return op1; - } - } - } - - if (opts.OptimizationEnabled() && fgGlobalMorph && - (((op1->gtFlags & GTF_EXCEPT) == 0) || ((op2->gtFlags & GTF_EXCEPT) == 0))) - { - // - a + b = > b - a - // ADD((NEG(a), b) => SUB(b, a) - - // Skip optimization if non-NEG operand is constant. - if (op1->OperIs(GT_NEG) && !op2->OperIs(GT_NEG) && - !(op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))) - { - // tree: ADD - // op1: NEG - // op2: b - // op1Child: a - - GenTree* op1Child = op1->AsOp()->gtOp1; // a - oper = GT_SUB; - tree->SetOper(oper, GenTree::PRESERVE_VN); - tree->AsOp()->gtOp1 = op2; - tree->AsOp()->gtOp2 = op1Child; - - DEBUG_DESTROY_NODE(op1); - - op1 = op2; - op2 = op1Child; - } - - // a + -b = > a - b - // ADD(a, (NEG(b)) => SUB(a, b) - - if (!op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG)) - { - // a is non cosntant because it was already canonicalized to have - // variable on the left and constant on the right. - - // tree: ADD - // op1: a - // op2: NEG - // op2Child: b - - GenTree* op2Child = op2->AsOp()->gtOp1; // a - oper = GT_SUB; - tree->SetOper(oper, GenTree::PRESERVE_VN); - tree->AsOp()->gtOp2 = op2Child; - - DEBUG_DESTROY_NODE(op2); - - op2 = op2Child; - } - } - } - /* See if we can fold GT_MUL by const nodes */ - else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase) - { -#ifndef TARGET_64BIT - noway_assert(typ <= TYP_UINT); -#endif // TARGET_64BIT - noway_assert(!tree->gtOverflow()); - - ssize_t mult = op2->AsIntConCommon()->IconValue(); - bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->AsIntCon()->gtFieldSeq != nullptr && - op2->AsIntCon()->gtFieldSeq->IsConstantIndexFieldSeq(); - - assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr); - - if (mult == 0) - { - // We may be able to throw away op1 (unless it has side-effects) - - if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0) - { - DEBUG_DESTROY_NODE(op1); - DEBUG_DESTROY_NODE(tree); - return op2; // Just return the "0" node - } - - // We need to keep op1 for the side-effects. Hang it off - // a GT_COMMA node - - tree->ChangeOper(GT_COMMA); - return tree; - } - - size_t abs_mult = (mult >= 0) ? mult : -mult; - size_t lowestBit = genFindLowestBit(abs_mult); - bool changeToShift = false; - - // is it a power of two? (positive or negative) - if (abs_mult == lowestBit) - { - // if negative negate (min-int does not need negation) - if (mult < 0 && mult != SSIZE_T_MIN) - { - // The type of the new GT_NEG node cannot just be op1->TypeGet(). - // Otherwise we may sign-extend incorrectly in cases where the GT_NEG - // node ends up feeding directly a cast, for example in - // GT_CAST(GT_MUL(-1, s_1.ubyte)) - tree->AsOp()->gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); - fgMorphTreeDone(op1); - } - - // If "op2" is a constant array index, the other multiplicand must be a constant. - // Transfer the annotation to the other one. - if (op2->OperGet() == GT_CNS_INT && op2->AsIntCon()->gtFieldSeq != nullptr && - op2->AsIntCon()->gtFieldSeq->IsConstantIndexFieldSeq()) - { - assert(op2->AsIntCon()->gtFieldSeq->m_next == nullptr); - GenTree* otherOp = op1; - if (otherOp->OperGet() == GT_NEG) - { - otherOp = otherOp->AsOp()->gtOp1; - } - assert(otherOp->OperGet() == GT_CNS_INT); - assert(otherOp->AsIntCon()->gtFieldSeq == FieldSeqStore::NotAField()); - otherOp->AsIntCon()->gtFieldSeq = op2->AsIntCon()->gtFieldSeq; - } - - if (abs_mult == 1) - { - DEBUG_DESTROY_NODE(op2); - DEBUG_DESTROY_NODE(tree); - return op1; - } - - /* Change the multiplication into a shift by log2(val) bits */ - op2->AsIntConCommon()->SetIconValue(genLog2(abs_mult)); - changeToShift = true; - } -#if LEA_AVAILABLE - else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult()) - { - int shift = genLog2(lowestBit); - ssize_t factor = abs_mult >> shift; - - if (factor == 3 || factor == 5 || factor == 9) - { - // if negative negate (min-int does not need negation) - if (mult < 0 && mult != SSIZE_T_MIN) - { - tree->AsOp()->gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); - fgMorphTreeDone(op1); - } - - GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL); - if (op2IsConstIndex) - { - factorIcon->AsIntCon()->gtFieldSeq = - GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); - } - - // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift - tree->AsOp()->gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon); - fgMorphTreeDone(op1); - - op2->AsIntConCommon()->SetIconValue(shift); - changeToShift = true; - } - } -#endif // LEA_AVAILABLE - if (changeToShift) - { - // vnStore is null before the ValueNumber phase has run - if (vnStore != nullptr) - { - // Update the ValueNumber for 'op2', as we just changed the constant - fgValueNumberTreeConst(op2); - } - oper = GT_LSH; - // Keep the old ValueNumber for 'tree' as the new expr - // will still compute the same value as before - tree->ChangeOper(oper, GenTree::PRESERVE_VN); - - goto DONE_MORPHING_CHILDREN; - } - } - else if (fgOperIsBitwiseRotationRoot(oper)) - { - tree = fgRecognizeAndMorphBitwiseRotation(tree); - - // fgRecognizeAndMorphBitwiseRotation may return a new tree - oper = tree->OperGet(); - typ = tree->TypeGet(); - op1 = tree->AsOp()->gtOp1; - op2 = tree->AsOp()->gtOp2; - } - - if (varTypeIsIntegralOrI(tree->TypeGet()) && tree->OperIs(GT_ADD, GT_MUL, GT_AND, GT_OR, GT_XOR)) - { - GenTree* foldedTree = fgMorphCommutative(tree->AsOp()); - if (foldedTree != nullptr) - { - tree = foldedTree; - op1 = tree->gtGetOp1(); - op2 = tree->gtGetOp2(); - if (!tree->OperIs(oper)) - { - return tree; - } - } - } - - break; - - case GT_NOT: - case GT_NEG: - // Remove double negation/not. - // Note: this is not a safe tranformation if "tree" is a CSE candidate. - // Consider for example the following expression: NEG(NEG(OP)), where the top-level - // NEG is a CSE candidate. Were we to morph this to just OP, CSE would fail to find - // the original NEG in the statement. - if (op1->OperIs(oper) && opts.OptimizationEnabled() && !gtIsActiveCSE_Candidate(tree)) - { - GenTree* child = op1->AsOp()->gtGetOp1(); - return child; - } - - // Distribute negation over simple multiplication/division expressions - if (opts.OptimizationEnabled() && !optValnumCSE_phase && tree->OperIs(GT_NEG) && - op1->OperIs(GT_MUL, GT_DIV)) - { - GenTreeOp* mulOrDiv = op1->AsOp(); - GenTree* op1op1 = mulOrDiv->gtGetOp1(); - GenTree* op1op2 = mulOrDiv->gtGetOp2(); - - if (!op1op1->IsCnsIntOrI() && op1op2->IsCnsIntOrI() && !op1op2->IsIconHandle()) - { - // NEG(MUL(a, C)) => MUL(a, -C) - // NEG(DIV(a, C)) => DIV(a, -C), except when C = {-1, 1} - ssize_t constVal = op1op2->AsIntCon()->IconValue(); - if ((mulOrDiv->OperIs(GT_DIV) && (constVal != -1) && (constVal != 1)) || - (mulOrDiv->OperIs(GT_MUL) && !mulOrDiv->gtOverflow())) - { - GenTree* newOp1 = op1op1; // a - GenTree* newOp2 = gtNewIconNode(-constVal, op1op2->TypeGet()); // -C - mulOrDiv->gtOp1 = newOp1; - mulOrDiv->gtOp2 = newOp2; - - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op1op2); - - return mulOrDiv; - } - } - } - - /* Any constant cases should have been folded earlier */ - noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase); - break; - - case GT_CKFINITE: - - noway_assert(varTypeIsFloating(op1->TypeGet())); - - fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN); - break; - - case GT_OBJ: - // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on - // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X - // is a local or clsVar, even if it has been address-exposed. - if (op1->OperGet() == GT_ADDR) - { - GenTreeUnOp* addr = op1->AsUnOp(); - GenTree* addrOp = addr->gtGetOp1(); - tree->gtFlags |= (addrOp->gtFlags & GTF_GLOB_REF); - } - break; - - case GT_IND: - { - // Can not remove a GT_IND if it is currently a CSE candidate. - if (gtIsActiveCSE_Candidate(tree)) - { - break; - } - - bool foldAndReturnTemp = false; - temp = nullptr; - ival1 = 0; - - // Don't remove a volatile GT_IND, even if the address points to a local variable. - if ((tree->gtFlags & GTF_IND_VOLATILE) == 0) - { - /* Try to Fold *(&X) into X */ - if (op1->gtOper == GT_ADDR) - { - // Can not remove a GT_ADDR if it is currently a CSE candidate. - if (gtIsActiveCSE_Candidate(op1)) - { - break; - } - - temp = op1->AsOp()->gtOp1; // X - - // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that - // they are the *same* struct type. In fact, they almost certainly aren't. If the - // address has an associated field sequence, that identifies this case; go through - // the "lcl_fld" path rather than this one. - FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below. - if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq)) - { - foldAndReturnTemp = true; - } - else if (temp->OperIsLocal()) - { - unsigned lclNum = temp->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[lclNum]; - - // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset - if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0)) - { - noway_assert(varTypeIsStruct(varDsc)); - - // We will try to optimize when we have a single field struct that is being struct promoted - if (varDsc->lvFieldCnt == 1) - { - unsigned lclNumFld = varDsc->lvFieldLclStart; - // just grab the promoted field - LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld]; - - // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset - // is zero - if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0)) - { - // We can just use the existing promoted field LclNum - temp->AsLclVarCommon()->SetLclNum(lclNumFld); - temp->gtType = fieldVarDsc->TypeGet(); - - foldAndReturnTemp = true; - } - } - } - // If the type of the IND (typ) is a "small int", and the type of the local has the - // same width, then we can reduce to just the local variable -- it will be - // correctly normalized. - // - // The below transformation cannot be applied if the local var needs to be normalized on load. - else if (varTypeIsSmall(typ) && (genTypeSize(varDsc) == genTypeSize(typ)) && - !lvaTable[lclNum].lvNormalizeOnLoad()) - { - const bool definitelyLoad = (tree->gtFlags & GTF_DONT_CSE) == 0; - const bool possiblyStore = !definitelyLoad; - - if (possiblyStore || (varTypeIsUnsigned(varDsc) == varTypeIsUnsigned(typ))) - { - typ = temp->TypeGet(); - tree->gtType = typ; - foldAndReturnTemp = true; - - if (possiblyStore) - { - // This node can be on the left-hand-side of an assignment node. - // Mark this node with GTF_VAR_FOLDED_IND to make sure that fgDoNormalizeOnStore() - // is called on its parent in post-order morph. - temp->gtFlags |= GTF_VAR_FOLDED_IND; - } - } - } - // For matching types we can fold - else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) && - !lvaTable[lclNum].lvNormalizeOnLoad()) - { - tree->gtType = typ = temp->TypeGet(); - foldAndReturnTemp = true; - } - else - { - // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e. - // nullptr) - assert(fieldSeq == nullptr); - bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq); - assert(b || fieldSeq == nullptr); - - if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD)) - { - // Append the field sequence, change the type. - temp->AsLclFld()->SetFieldSeq( - GetFieldSeqStore()->Append(temp->AsLclFld()->GetFieldSeq(), fieldSeq)); - temp->gtType = typ; - - foldAndReturnTemp = true; - } - } - // Otherwise will will fold this into a GT_LCL_FLD below - // where we check (temp != nullptr) - } - else // !temp->OperIsLocal() - { - // We don't try to fold away the GT_IND/GT_ADDR for this case - temp = nullptr; - } - } - else if (op1->OperGet() == GT_ADD) - { -#ifdef TARGET_ARM - // Check for a misalignment floating point indirection. - if (varTypeIsFloating(typ)) - { - GenTree* addOp2 = op1->AsOp()->gtGetOp2(); - if (addOp2->IsCnsIntOrI()) - { - ssize_t offset = addOp2->AsIntCon()->gtIconVal; - if ((offset % emitTypeSize(TYP_FLOAT)) != 0) - { - tree->gtFlags |= GTF_IND_UNALIGNED; - } - } - } -#endif // TARGET_ARM - - /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */ - - if (op1->AsOp()->gtOp1->OperGet() == GT_ADDR && op1->AsOp()->gtOp2->OperGet() == GT_CNS_INT && - opts.OptimizationEnabled()) - { - // No overflow arithmetic with pointers - noway_assert(!op1->gtOverflow()); - - temp = op1->AsOp()->gtOp1->AsOp()->gtOp1; - if (!temp->OperIsLocal()) - { - temp = nullptr; - break; - } - - // Can not remove the GT_ADDR if it is currently a CSE candidate. - if (gtIsActiveCSE_Candidate(op1->AsOp()->gtOp1)) - { - break; - } - - ival1 = op1->AsOp()->gtOp2->AsIntCon()->gtIconVal; - fieldSeq = op1->AsOp()->gtOp2->AsIntCon()->gtFieldSeq; - - // Does the address have an associated zero-offset field sequence? - FieldSeqNode* addrFieldSeq = nullptr; - if (GetZeroOffsetFieldMap()->Lookup(op1->AsOp()->gtOp1, &addrFieldSeq)) - { - fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq); - } - - if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT) - { - noway_assert(!varTypeIsGC(temp->TypeGet())); - foldAndReturnTemp = true; - } - else - { - // The emitter can't handle large offsets - if (ival1 != (unsigned short)ival1) - { - break; - } - - // The emitter can get confused by invalid offsets - if (ival1 >= Compiler::lvaLclSize(temp->AsLclVarCommon()->GetLclNum())) - { - break; - } - } - // Now we can fold this into a GT_LCL_FLD below - // where we check (temp != nullptr) - } - } - } - - // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging: - // - We may have a load of a local where the load has a different type than the local - // - We may have a load of a local plus an offset - // - // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and - // offset if doing so is legal. The only cases in which this transformation is illegal are if the load - // begins before the local or if the load extends beyond the end of the local (i.e. if the load is - // out-of-bounds w.r.t. the local). - if ((temp != nullptr) && !foldAndReturnTemp) - { - assert(temp->OperIsLocal()); - - const unsigned lclNum = temp->AsLclVarCommon()->GetLclNum(); - LclVarDsc* const varDsc = &lvaTable[lclNum]; - - const var_types tempTyp = temp->TypeGet(); - const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK); - const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp); - - // Make sure we do not enregister this lclVar. - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); - - // If the size of the load is greater than the size of the lclVar, we cannot fold this access into - // a lclFld: the access represented by an lclFld node must begin at or after the start of the - // lclVar and must not extend beyond the end of the lclVar. - if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize)) - { - GenTreeLclFld* lclFld; - - // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival' - // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival' - // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type. - // - if (temp->OperGet() == GT_LCL_FLD) - { - lclFld = temp->AsLclFld(); - lclFld->SetLclOffs(lclFld->GetLclOffs() + static_cast(ival1)); - lclFld->SetFieldSeq(GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeq)); - } - else // we have a GT_LCL_VAR - { - assert(temp->OperGet() == GT_LCL_VAR); - temp->ChangeOper(GT_LCL_FLD); // Note that this typically makes the gtFieldSeq "NotAField", - // unless there is a zero filed offset associated with 'temp'. - lclFld = temp->AsLclFld(); - lclFld->SetLclOffs(static_cast(ival1)); - - if (lclFld->GetFieldSeq() == FieldSeqStore::NotAField()) - { - if (fieldSeq != nullptr) - { - // If it does represent a field, note that. - lclFld->SetFieldSeq(fieldSeq); - } - } - else - { - // Append 'fieldSeq' to the existing one - lclFld->SetFieldSeq(GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeq)); - } - } - temp->gtType = tree->gtType; - foldAndReturnTemp = true; - } - } - - if (foldAndReturnTemp) - { - assert(temp != nullptr); - assert(temp->TypeGet() == typ); - assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR)); - - // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for - // 'temp' because a GT_ADDR always marks it for its operand. - temp->gtFlags &= ~GTF_DONT_CSE; - temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE); - - if (op1->OperGet() == GT_ADD) - { - DEBUG_DESTROY_NODE(op1->AsOp()->gtOp1); // GT_ADDR - DEBUG_DESTROY_NODE(op1->AsOp()->gtOp2); // GT_CNS_INT - } - DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR - DEBUG_DESTROY_NODE(tree); // GT_IND - - // If the result of the fold is a local var, we may need to perform further adjustments e.g. for - // normalization. - if (temp->OperIs(GT_LCL_VAR)) - { -#ifdef DEBUG - // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear - // and the node in question must have this bit set (as it has already been morphed). - temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - const bool forceRemorph = true; - temp = fgMorphLocalVar(temp, forceRemorph); -#ifdef DEBUG - // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the - // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function - // returns. - temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif // DEBUG - } - - return temp; - } - - // Only do this optimization when we are in the global optimizer. Doing this after value numbering - // could result in an invalid value number for the newly generated GT_IND node. - if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph) - { - // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)). - // TBD: this transformation is currently necessary for correctness -- it might - // be good to analyze the failures that result if we don't do this, and fix them - // in other ways. Ideally, this should be optional. - GenTree* commaNode = op1; - unsigned treeFlags = tree->gtFlags; - commaNode->gtType = typ; - commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is - // dangerous, clear the GTF_REVERSE_OPS at - // least. -#ifdef DEBUG - commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - while (commaNode->AsOp()->gtOp2->gtOper == GT_COMMA) - { - commaNode = commaNode->AsOp()->gtOp2; - commaNode->gtType = typ; - commaNode->gtFlags = - (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG & ~GTF_CALL); // Bashing the GT_COMMA flags here is - // dangerous, clear the GTF_REVERSE_OPS, GT_ASG, and GT_CALL at - // least. - commaNode->gtFlags |= ((commaNode->AsOp()->gtOp1->gtFlags | commaNode->AsOp()->gtOp2->gtFlags) & - (GTF_ASG | GTF_CALL)); -#ifdef DEBUG - commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - } - bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0; - ArrayInfo arrInfo; - if (wasArrIndex) - { - bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); - assert(b); - GetArrayInfoMap()->Remove(tree); - } - tree = op1; - GenTree* addr = commaNode->AsOp()->gtOp2; - op1 = gtNewIndir(typ, addr); - // This is very conservative - op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING; - op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT); - - if (wasArrIndex) - { - GetArrayInfoMap()->Set(op1, arrInfo); - } -#ifdef DEBUG - op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - commaNode->AsOp()->gtOp2 = op1; - commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); - return tree; - } - - break; - } - - case GT_ADDR: - - // Can not remove op1 if it is currently a CSE candidate. - if (gtIsActiveCSE_Candidate(op1)) - { - break; - } - - if (op1->OperGet() == GT_IND) - { - if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0) - { - // Can not remove a GT_ADDR if it is currently a CSE candidate. - if (gtIsActiveCSE_Candidate(tree)) - { - break; - } - - // Perform the transform ADDR(IND(...)) == (...). - GenTree* addr = op1->AsOp()->gtOp1; - - // If tree has a zero field sequence annotation, update the annotation - // on addr node. - FieldSeqNode* zeroFieldSeq = nullptr; - if (GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq)) - { - fgAddFieldSeqForZeroOffset(addr, zeroFieldSeq); - } - - noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); - - DEBUG_DESTROY_NODE(op1); - DEBUG_DESTROY_NODE(tree); - - return addr; - } - } - else if (op1->OperGet() == GT_OBJ) - { - // Can not remove a GT_ADDR if it is currently a CSE candidate. - if (gtIsActiveCSE_Candidate(tree)) - { - break; - } - - // Perform the transform ADDR(OBJ(...)) == (...). - GenTree* addr = op1->AsObj()->Addr(); - - noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); - - DEBUG_DESTROY_NODE(op1); - DEBUG_DESTROY_NODE(tree); - - return addr; - } - else if (op1->gtOper == GT_CAST) - { - GenTree* casting = op1->AsCast()->CastOp(); - if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR) - { - DEBUG_DESTROY_NODE(op1); - tree->AsOp()->gtOp1 = op1 = casting; - } - } - else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase) - { - // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)). - // (Be sure to mark "z" as an l-value...) - - GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); - for (GenTree* comma = op1; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) - { - commas.Push(comma); - } - GenTree* commaNode = commas.Top(); - - // The top-level addr might be annotated with a zeroOffset field. - FieldSeqNode* zeroFieldSeq = nullptr; - bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq); - tree = op1; - commaNode->AsOp()->gtOp2->gtFlags |= GTF_DONT_CSE; - - // If the node we're about to put under a GT_ADDR is an indirection, it - // doesn't need to be materialized, since we only want the addressing mode. Because - // of this, this GT_IND is not a faulting indirection and we don't have to extract it - // as a side effect. - GenTree* commaOp2 = commaNode->AsOp()->gtOp2; - if (commaOp2->OperIsBlk()) - { - commaOp2->SetOper(GT_IND); - } - if (commaOp2->gtOper == GT_IND) - { - commaOp2->gtFlags |= GTF_IND_NONFAULTING; - commaOp2->gtFlags &= ~GTF_EXCEPT; - commaOp2->gtFlags |= (commaOp2->AsOp()->gtOp1->gtFlags & GTF_EXCEPT); - } - - op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2); - - if (isZeroOffset) - { - // Transfer the annotation to the new GT_ADDR node. - fgAddFieldSeqForZeroOffset(op1, zeroFieldSeq); - } - commaNode->AsOp()->gtOp2 = op1; - // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform - // might give op1 a type different from byref (like, say, native int). So now go back and give - // all the comma nodes the type of op1. - // TODO: the comma flag update below is conservative and can be improved. - // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to - // get rid of some of the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF). - - while (!commas.Empty()) - { - GenTree* comma = commas.Pop(); - comma->gtType = op1->gtType; - comma->gtFlags |= op1->gtFlags; -#ifdef DEBUG - comma->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - gtUpdateNodeSideEffects(comma); - } - - return tree; - } - break; - - case GT_COLON: - if (fgGlobalMorph) - { - /* Mark the nodes that are conditionally executed */ - fgWalkTreePre(&tree, gtMarkColonCond); - } - /* Since we're doing this postorder we clear this if it got set by a child */ - fgRemoveRestOfBlock = false; - break; - - case GT_COMMA: - - /* Special case: trees that don't produce a value */ - if (op2->OperIs(GT_ASG) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2)) - { - typ = tree->gtType = TYP_VOID; - } - - // If we are in the Valuenum CSE phase then don't morph away anything as these - // nodes may have CSE defs/uses in them. - // - if (!optValnumCSE_phase) - { - // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this - // is all we need. - - GenTree* op1SideEffects = nullptr; - // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example) - // hoisted expressions in loops. - gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE)); - if (op1SideEffects) - { - // Replace the left hand side with the side effect list. - tree->AsOp()->gtOp1 = op1SideEffects; - gtUpdateNodeSideEffects(tree); - } - else - { - op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op1); - return op2; - } - - /* If the right operand is just a void nop node, throw it away */ - if (op2->IsNothingNode() && op1->gtType == TYP_VOID) - { - op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op2); - return op1; - } - } - - break; - - case GT_JTRUE: - - /* Special case if fgRemoveRestOfBlock is set to true */ - if (fgRemoveRestOfBlock) - { - if (fgIsCommaThrow(op1, true)) - { - GenTree* throwNode = op1->AsOp()->gtOp1; - - JITDUMP("Removing [%06d] GT_JTRUE as the block now unconditionally throws an exception.\n", - dspTreeID(tree)); - DEBUG_DESTROY_NODE(tree); - - return throwNode; - } - - noway_assert(op1->OperKind() & GTK_RELOP); - noway_assert(op1->gtFlags & GTF_EXCEPT); - - // We need to keep op1 for the side-effects. Hang it off - // a GT_COMMA node - - JITDUMP("Keeping side-effects by bashing [%06d] GT_JTRUE into a GT_COMMA.\n", dspTreeID(tree)); - - tree->ChangeOper(GT_COMMA); - tree->AsOp()->gtOp2 = op2 = gtNewNothingNode(); - - // Additionally since we're eliminating the JTRUE - // codegen won't like it if op1 is a RELOP of longs, floats or doubles. - // So we change it into a GT_COMMA as well. - JITDUMP("Also bashing [%06d] (a relop) into a GT_COMMA.\n", dspTreeID(op1)); - op1->ChangeOper(GT_COMMA); - op1->gtFlags &= ~GTF_UNSIGNED; // Clear the unsigned flag if it was set on the relop - op1->gtType = op1->AsOp()->gtOp1->gtType; - - return tree; - } - break; - - default: - break; - } - - assert(oper == tree->gtOper); - - // If we are in the Valuenum CSE phase then don't morph away anything as these - // nodes may have CSE defs/uses in them. - // - if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList()) - { - /* Check for op1 as a GT_COMMA with a unconditional throw node */ - if (op1 && fgIsCommaThrow(op1, true)) - { - if ((op1->gtFlags & GTF_COLON_COND) == 0) - { - /* We can safely throw out the rest of the statements */ - fgRemoveRestOfBlock = true; - } - - GenTree* throwNode = op1->AsOp()->gtOp1; - - if (oper == GT_COMMA) - { - /* Both tree and op1 are GT_COMMA nodes */ - /* Change the tree's op1 to the throw node: op1->AsOp()->gtOp1 */ - tree->AsOp()->gtOp1 = throwNode; - - // Possibly reset the assignment flag - if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) - { - tree->gtFlags &= ~GTF_ASG; - } - - return tree; - } - else if (oper != GT_NOP) - { - if (genActualType(typ) == genActualType(op1->gtType)) - { - /* The types match so, return the comma throw node as the new tree */ - return op1; - } - else - { - if (typ == TYP_VOID) - { - // Return the throw node - return throwNode; - } - else - { - GenTree* commaOp2 = op1->AsOp()->gtOp2; - - // need type of oper to be same as tree - if (typ == TYP_LONG) - { - commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); - commaOp2->AsIntConCommon()->SetLngValue(0); - /* Change the types of oper and commaOp2 to TYP_LONG */ - op1->gtType = commaOp2->gtType = TYP_LONG; - } - else if (varTypeIsFloating(typ)) - { - commaOp2->ChangeOperConst(GT_CNS_DBL); - commaOp2->AsDblCon()->gtDconVal = 0.0; - /* Change the types of oper and commaOp2 to TYP_DOUBLE */ - op1->gtType = commaOp2->gtType = TYP_DOUBLE; - } - else - { - commaOp2->ChangeOperConst(GT_CNS_INT); - commaOp2->AsIntConCommon()->SetIconValue(0); - /* Change the types of oper and commaOp2 to TYP_INT */ - op1->gtType = commaOp2->gtType = TYP_INT; - } - - /* Return the GT_COMMA node as the new tree */ - return op1; - } - } - } - } - - /* Check for op2 as a GT_COMMA with a unconditional throw */ - - if (op2 && fgIsCommaThrow(op2, true)) - { - if ((op2->gtFlags & GTF_COLON_COND) == 0) - { - /* We can safely throw out the rest of the statements */ - fgRemoveRestOfBlock = true; - } - - // If op1 has no side-effects - if ((op1->gtFlags & GTF_ALL_EFFECT) == 0) - { - // If tree is an asg node - if (tree->OperIs(GT_ASG)) - { - /* Return the throw node as the new tree */ - return op2->AsOp()->gtOp1; - } - - if (tree->OperGet() == GT_ARR_BOUNDS_CHECK) - { - /* Return the throw node as the new tree */ - return op2->AsOp()->gtOp1; - } - - // If tree is a comma node - if (tree->OperGet() == GT_COMMA) - { - /* Return the throw node as the new tree */ - return op2->AsOp()->gtOp1; - } - - /* for the shift nodes the type of op2 can differ from the tree type */ - if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT)) - { - noway_assert(GenTree::OperIsShiftOrRotate(oper)); - - GenTree* commaOp2 = op2->AsOp()->gtOp2; - - commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); - commaOp2->AsIntConCommon()->SetLngValue(0); - - /* Change the types of oper and commaOp2 to TYP_LONG */ - op2->gtType = commaOp2->gtType = TYP_LONG; - } - - if ((genActualType(typ) == TYP_INT) && - (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet()))) - { - // An example case is comparison (say GT_GT) of two longs or floating point values. - - GenTree* commaOp2 = op2->AsOp()->gtOp2; - - commaOp2->ChangeOperConst(GT_CNS_INT); - commaOp2->AsIntCon()->gtIconVal = 0; - /* Change the types of oper and commaOp2 to TYP_INT */ - op2->gtType = commaOp2->gtType = TYP_INT; - } - - if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL)) - { - noway_assert(tree->OperGet() == GT_ADD); - - GenTree* commaOp2 = op2->AsOp()->gtOp2; - - commaOp2->ChangeOperConst(GT_CNS_INT); - commaOp2->AsIntCon()->gtIconVal = 0; - /* Change the types of oper and commaOp2 to TYP_BYREF */ - op2->gtType = commaOp2->gtType = TYP_BYREF; - } - - /* types should now match */ - noway_assert((genActualType(typ) == genActualType(op2->gtType))); - - /* Return the GT_COMMA node as the new tree */ - return op2; - } - } - } - - /*------------------------------------------------------------------------- - * Optional morphing is done if tree transformations is permitted - */ - - if ((opts.compFlags & CLFLG_TREETRANS) == 0) - { - return tree; - } - - tree = fgMorphSmpOpOptional(tree->AsOp()); - - return tree; -} - -//---------------------------------------------------------------------------------------------- -// fgMorphRetInd: Try to get rid of extra IND(ADDR()) pairs in a return tree. -// -// Arguments: -// node - The return node that uses an indirection. -// -// Return Value: -// the original op1 of the ret if there was no optimization or an optimized new op1. -// -GenTree* Compiler::fgMorphRetInd(GenTreeUnOp* ret) -{ - assert(ret->OperIs(GT_RETURN)); - assert(ret->gtGetOp1()->OperIs(GT_IND, GT_BLK, GT_OBJ)); - GenTreeIndir* ind = ret->gtGetOp1()->AsIndir(); - GenTree* addr = ind->Addr(); - - if (addr->OperIs(GT_ADDR) && addr->gtGetOp1()->OperIs(GT_LCL_VAR)) - { - // If `return` retypes LCL_VAR as a smaller struct it should not set `doNotEnregister` on that - // LclVar. - // Example: in `Vector128:AsVector2` we have RETURN SIMD8(OBJ SIMD8(ADDR byref(LCL_VAR SIMD16))). - GenTreeLclVar* lclVar = addr->gtGetOp1()->AsLclVar(); - if (!lvaIsImplicitByRefLocal(lclVar->GetLclNum())) - { - assert(!gtIsActiveCSE_Candidate(addr) && !gtIsActiveCSE_Candidate(ind)); - unsigned indSize; - if (ind->OperIs(GT_IND)) - { - indSize = genTypeSize(ind); - } - else - { - indSize = ind->AsBlk()->GetLayout()->GetSize(); - } - - LclVarDsc* varDsc = lvaGetDesc(lclVar); - - unsigned lclVarSize; - if (!lclVar->TypeIs(TYP_STRUCT)) - - { - lclVarSize = genTypeSize(varDsc->TypeGet()); - } - else - { - lclVarSize = varDsc->lvExactSize; - } - // TODO: change conditions in `canFold` to `indSize <= lclVarSize`, but currently do not support `BITCAST - // int<-SIMD16` etc. - assert((indSize <= lclVarSize) || varDsc->lvDoNotEnregister); - -#if defined(TARGET_64BIT) - bool canFold = (indSize == lclVarSize); -#else // !TARGET_64BIT - // TODO: improve 32 bit targets handling for LONG returns if necessary, nowadays we do not support `BITCAST - // long<->double` there. - bool canFold = (indSize == lclVarSize) && (lclVarSize <= REGSIZE_BYTES); -#endif - // TODO: support `genReturnBB != nullptr`, it requires #11413 to avoid `Incompatible types for - // gtNewTempAssign`. - if (canFold && (genReturnBB == nullptr)) - { - // Fold (TYPE1)*(&(TYPE2)x) even if types do not match, lowering will handle it. - // Getting rid of this IND(ADDR()) pair allows to keep lclVar as not address taken - // and enregister it. - DEBUG_DESTROY_NODE(ind); - DEBUG_DESTROY_NODE(addr); - ret->gtOp1 = lclVar; - return ret->gtGetOp1(); - } - else if (!varDsc->lvDoNotEnregister) - { - lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(Compiler::DNER_BlockOp)); - } - } - } - return ind; -} - -#ifdef _PREFAST_ -#pragma warning(pop) -#endif - -GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) -{ - genTreeOps oper = tree->gtOper; - GenTree* op1 = tree->gtOp1; - GenTree* op2 = tree->gtOp2; - var_types typ = tree->TypeGet(); - - if (fgGlobalMorph && GenTree::OperIsCommutative(oper)) - { - /* Swap the operands so that the more expensive one is 'op1' */ - - if (tree->gtFlags & GTF_REVERSE_OPS) - { - tree->gtOp1 = op2; - tree->gtOp2 = op1; - - op2 = op1; - op1 = tree->gtOp1; - - tree->gtFlags &= ~GTF_REVERSE_OPS; - } - - if (oper == op2->gtOper) - { - /* Reorder nested operators at the same precedence level to be - left-recursive. For example, change "(a+(b+c))" to the - equivalent expression "((a+b)+c)". - */ - - /* Things are handled differently for floating-point operators */ - - if (!varTypeIsFloating(tree->TypeGet())) - { - fgMoveOpsLeft(tree); - op1 = tree->gtOp1; - op2 = tree->gtOp2; - } - } - } - -#if REARRANGE_ADDS - - /* Change "((x+icon)+y)" to "((x+y)+icon)" - Don't reorder floating-point operations */ - - if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() && - varTypeIsIntegralOrI(typ)) - { - GenTree* ad1 = op1->AsOp()->gtOp1; - GenTree* ad2 = op1->AsOp()->gtOp2; - - if (!op2->OperIsConst() && ad2->OperIsConst()) - { - // This takes - // + (tree) - // / \. - // / \. - // / \. - // + (op1) op2 - // / \. - // / \. - // ad1 ad2 - // - // and it swaps ad2 and op2. - - // Don't create a byref pointer that may point outside of the ref object. - // If a GC happens, the byref won't get updated. This can happen if one - // of the int components is negative. It also requires the address generation - // be in a fully-interruptible code region. - if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet())) - { - tree->gtOp2 = ad2; - - op1->AsOp()->gtOp2 = op2; - op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT; - - op2 = tree->gtOp2; - } - } - } - -#endif - - /*------------------------------------------------------------------------- - * Perform optional oper-specific postorder morphing - */ - - switch (oper) - { - case GT_ASG: - // Make sure we're allowed to do this. - if (optValnumCSE_phase) - { - // It is not safe to reorder/delete CSE's - break; - } - - if (varTypeIsStruct(typ) && !tree->IsPhiDefn()) - { - if (tree->OperIsCopyBlkOp()) - { - return fgMorphCopyBlock(tree); - } - else - { - return fgMorphInitBlock(tree); - } - } - - if (typ == TYP_LONG) - { - break; - } - - if (op2->gtFlags & GTF_ASG) - { - break; - } - - if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT)) - { - break; - } - - /* Special case: a cast that can be thrown away */ - - // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only - // one cast and sometimes there is another one after it that gets removed by this - // code. fgMorphSmp should be improved to remove all redundant casts so this code - // can be removed. - - if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow()) - { - var_types srct; - var_types cast; - var_types dstt; - - srct = op2->AsCast()->CastOp()->TypeGet(); - cast = (var_types)op2->CastToType(); - dstt = op1->TypeGet(); - - /* Make sure these are all ints and precision is not lost */ - - if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT) - { - op2 = tree->gtOp2 = op2->AsCast()->CastOp(); - } - } - - break; - - case GT_MUL: - - /* Check for the case "(val + icon) * icon" */ - - if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD) - { - GenTree* add = op1->AsOp()->gtOp2; - - if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0)) - { - if (tree->gtOverflow() || op1->gtOverflow()) - { - break; - } - - ssize_t imul = op2->AsIntCon()->gtIconVal; - ssize_t iadd = add->AsIntCon()->gtIconVal; - - /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */ - - oper = GT_ADD; - tree->ChangeOper(oper); - - op2->AsIntCon()->gtIconVal = iadd * imul; - - op1->ChangeOper(GT_MUL); - - add->AsIntCon()->gtIconVal = imul; -#ifdef TARGET_64BIT - if (add->gtType == TYP_INT) - { - // we need to properly re-sign-extend or truncate after multiplying two int constants above - add->AsIntCon()->TruncateOrSignExtend32(); - } -#endif // TARGET_64BIT - } - } - - break; - - case GT_DIV: - - /* For "val / 1", just return "val" */ - - if (op2->IsIntegralConst(1)) - { - DEBUG_DESTROY_NODE(tree); - return op1; - } - break; - - case GT_UDIV: - case GT_UMOD: - tree->CheckDivideByConstOptimized(this); - break; - - case GT_LSH: - - /* Check for the case "(val + icon) << icon" */ - - if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow()) - { - GenTree* cns = op1->AsOp()->gtOp2; - - if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0)) - { - ssize_t ishf = op2->AsIntConCommon()->IconValue(); - ssize_t iadd = cns->AsIntConCommon()->IconValue(); - - // printf("Changing '(val+icon1)<ChangeOper(GT_ADD); - ssize_t result = iadd << ishf; - op2->AsIntConCommon()->SetIconValue(result); -#ifdef TARGET_64BIT - if (op1->gtType == TYP_INT) - { - op2->AsIntCon()->TruncateOrSignExtend32(); - } -#endif // TARGET_64BIT - - // we are reusing the shift amount node here, but the type we want is that of the shift result - op2->gtType = op1->gtType; - - if (cns->gtOper == GT_CNS_INT && cns->AsIntCon()->gtFieldSeq != nullptr && - cns->AsIntCon()->gtFieldSeq->IsConstantIndexFieldSeq()) - { - assert(cns->AsIntCon()->gtFieldSeq->m_next == nullptr); - op2->AsIntCon()->gtFieldSeq = cns->AsIntCon()->gtFieldSeq; - } - - op1->ChangeOper(GT_LSH); - - cns->AsIntConCommon()->SetIconValue(ishf); - } - } - - break; - - case GT_XOR: - - if (!optValnumCSE_phase) - { - /* "x ^ -1" is "~x" */ - - if (op2->IsIntegralConst(-1)) - { - tree->ChangeOper(GT_NOT); - tree->gtOp2 = nullptr; - DEBUG_DESTROY_NODE(op2); - } - else if (op2->IsIntegralConst(1) && op1->OperIsCompare()) - { - /* "binaryVal ^ 1" is "!binaryVal" */ - gtReverseCond(op1); - DEBUG_DESTROY_NODE(op2); - DEBUG_DESTROY_NODE(tree); - return op1; - } - } - - break; - - case GT_INIT_VAL: - // Initialization values for initBlk have special semantics - their lower - // byte is used to fill the struct. However, we allow 0 as a "bare" value, - // which enables them to get a VNForZero, and be propagated. - if (op1->IsIntegralConst(0)) - { - return op1; - } - break; - - default: - break; - } - return tree; -} - -//------------------------------------------------------------------------ -// fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b -// (see ECMA III 3.55 and III.3.56). -// -// Arguments: -// tree - The GT_MOD/GT_UMOD tree to morph -// -// Returns: -// The morphed tree -// -// Notes: -// For ARM64 we don't have a remainder instruction so this transform is -// always done. For XARCH this transform is done if we know that magic -// division will be used, in that case this transform allows CSE to -// eliminate the redundant div from code like "x = a / 3; y = a % 3;". -// -// This method will produce the above expression in 'a' and 'b' are -// leaf nodes, otherwise, if any of them is not a leaf it will spill -// its value into a temporary variable, an example: -// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) ) -// -GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) -{ - if (tree->OperGet() == GT_MOD) - { - tree->SetOper(GT_DIV); - } - else if (tree->OperGet() == GT_UMOD) - { - tree->SetOper(GT_UDIV); - } - else - { - noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv"); - } - - var_types type = tree->gtType; - GenTree* denominator = tree->gtOp2; - GenTree* numerator = tree->gtOp1; - - if (!numerator->OperIsLeaf()) - { - numerator = fgMakeMultiUse(&tree->gtOp1); - } - - if (!denominator->OperIsLeaf()) - { - denominator = fgMakeMultiUse(&tree->gtOp2); - } - - // The numerator and denominator may have been assigned to temps, in which case - // their defining assignments are in the current tree. Therefore, we need to - // set the execuction order accordingly on the nodes we create. - // That is, the "mul" will be evaluated in "normal" order, and the "sub" must - // be set to be evaluated in reverse order. - // - GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator)); - assert(!mul->IsReverseOp()); - GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul); - sub->gtFlags |= GTF_REVERSE_OPS; - -#ifdef DEBUG - sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - - tree->CheckDivideByConstOptimized(this); - - return sub; -} - -//------------------------------------------------------------------------------ -// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree. -// -// -// Arguments: -// oper - Operation to check -// -// Return Value: -// True if the operation can be a root of a bitwise rotation tree; false otherwise. - -bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper) -{ - return (oper == GT_OR) || (oper == GT_XOR); -} - -//------------------------------------------------------------------------------ -// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return -// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree. -// -// Arguments: -// tree - tree to check for a rotation pattern -// -// Return Value: -// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise. -// -// Assumption: -// The input is a GT_OR or a GT_XOR tree. - -GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) -{ - // - // Check for a rotation pattern, e.g., - // - // OR ROL - // / \ / \. - // LSH RSZ -> x y - // / \ / \. - // x AND x AND - // / \ / \. - // y 31 ADD 31 - // / \. - // NEG 32 - // | - // y - // The patterns recognized: - // (x << (y & M)) op (x >>> ((-y + N) & M)) - // (x >>> ((-y + N) & M)) op (x << (y & M)) - // - // (x << y) op (x >>> (-y + N)) - // (x >> > (-y + N)) op (x << y) - // - // (x >>> (y & M)) op (x << ((-y + N) & M)) - // (x << ((-y + N) & M)) op (x >>> (y & M)) - // - // (x >>> y) op (x << (-y + N)) - // (x << (-y + N)) op (x >>> y) - // - // (x << c1) op (x >>> c2) - // (x >>> c1) op (x << c2) - // - // where - // c1 and c2 are const - // c1 + c2 == bitsize(x) - // N == bitsize(x) - // M is const - // M & (N - 1) == N - 1 - // op is either | or ^ - - if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0)) - { - // We can't do anything if the tree has assignments, calls, or volatile - // reads. Note that we allow GTF_EXCEPT side effect since any exceptions - // thrown by the original tree will be thrown by the transformed tree as well. - return tree; - } - - genTreeOps oper = tree->OperGet(); - assert(fgOperIsBitwiseRotationRoot(oper)); - - // Check if we have an LSH on one side of the OR and an RSZ on the other side. - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - GenTree* leftShiftTree = nullptr; - GenTree* rightShiftTree = nullptr; - if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ)) - { - leftShiftTree = op1; - rightShiftTree = op2; - } - else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH)) - { - leftShiftTree = op2; - rightShiftTree = op1; - } - else - { - return tree; - } - - // Check if the trees representing the value to shift are identical. - // We already checked that there are no side effects above. - if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1())) - { - GenTree* rotatedValue = leftShiftTree->gtGetOp1(); - var_types rotatedValueActualType = genActualType(rotatedValue->gtType); - ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8; - noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64)); - GenTree* leftShiftIndex = leftShiftTree->gtGetOp2(); - GenTree* rightShiftIndex = rightShiftTree->gtGetOp2(); - - // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits - // shouldn't be masked for the transformation to be valid. If additional - // higher bits are not masked, the transformation is still valid since the result - // of MSIL shift instructions is unspecified if the shift amount is greater or equal - // than the width of the value being shifted. - ssize_t minimalMask = rotatedValueBitSize - 1; - ssize_t leftShiftMask = -1; - ssize_t rightShiftMask = -1; - - if ((leftShiftIndex->OperGet() == GT_AND)) - { - if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI()) - { - leftShiftMask = leftShiftIndex->gtGetOp2()->AsIntCon()->gtIconVal; - leftShiftIndex = leftShiftIndex->gtGetOp1(); - } - else - { - return tree; - } - } - - if ((rightShiftIndex->OperGet() == GT_AND)) - { - if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI()) - { - rightShiftMask = rightShiftIndex->gtGetOp2()->AsIntCon()->gtIconVal; - rightShiftIndex = rightShiftIndex->gtGetOp1(); - } - else - { - return tree; - } - } - - if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask)) - { - // The shift index is overmasked, e.g., we have - // something like (x << y & 15) or - // (x >> (32 - y) & 15 with 32 bit x. - // The transformation is not valid. - return tree; - } - - GenTree* shiftIndexWithAdd = nullptr; - GenTree* shiftIndexWithoutAdd = nullptr; - genTreeOps rotateOp = GT_NONE; - GenTree* rotateIndex = nullptr; - - if (leftShiftIndex->OperGet() == GT_ADD) - { - shiftIndexWithAdd = leftShiftIndex; - shiftIndexWithoutAdd = rightShiftIndex; - rotateOp = GT_ROR; - } - else if (rightShiftIndex->OperGet() == GT_ADD) - { - shiftIndexWithAdd = rightShiftIndex; - shiftIndexWithoutAdd = leftShiftIndex; - rotateOp = GT_ROL; - } - - if (shiftIndexWithAdd != nullptr) - { - if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI()) - { - if (shiftIndexWithAdd->gtGetOp2()->AsIntCon()->gtIconVal == rotatedValueBitSize) - { - if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG) - { - if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd)) - { - // We found one of these patterns: - // (x << (y & M)) | (x >>> ((-y + N) & M)) - // (x << y) | (x >>> (-y + N)) - // (x >>> (y & M)) | (x << ((-y + N) & M)) - // (x >>> y) | (x << (-y + N)) - // where N == bitsize(x), M is const, and - // M & (N - 1) == N - 1 - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifndef TARGET_64BIT - if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64)) - { - // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86. - // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need - // to add helpers for GT_ROL and GT_ROR. - return tree; - } -#endif - - rotateIndex = shiftIndexWithoutAdd; - } - } - } - } - } - else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI())) - { - if (leftShiftIndex->AsIntCon()->gtIconVal + rightShiftIndex->AsIntCon()->gtIconVal == rotatedValueBitSize) - { - // We found this pattern: - // (x << c1) | (x >>> c2) - // where c1 and c2 are const and c1 + c2 == bitsize(x) - rotateOp = GT_ROL; - rotateIndex = leftShiftIndex; - } - } - - if (rotateIndex != nullptr) - { - noway_assert(GenTree::OperIsRotate(rotateOp)); - - unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT; - - // We can use the same tree only during global morph; reusing the tree in a later morph - // may invalidate value numbers. - if (fgGlobalMorph) - { - tree->AsOp()->gtOp1 = rotatedValue; - tree->AsOp()->gtOp2 = rotateIndex; - tree->ChangeOper(rotateOp); - - unsigned childFlags = 0; - for (GenTree* op : tree->Operands()) - { - childFlags |= (op->gtFlags & GTF_ALL_EFFECT); - } - - // The parent's flags should be a superset of its operands' flags - noway_assert((inputTreeEffects & childFlags) == childFlags); - } - else - { - tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex); - noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT)); - } - - return tree; - } - } - return tree; -} - -/***************************************************************************** - * - * Transform the given tree for code generation and return an equivalent tree. - */ - -GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac) -{ - assert(tree); - -#ifdef DEBUG - if (verbose) - { - if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID) - { - noway_assert(!"JitBreakMorphTree hit"); - } - } -#endif - -#ifdef DEBUG - int thisMorphNum = 0; - if (verbose && treesBeforeAfterMorph) - { - thisMorphNum = morphNum++; - printf("\nfgMorphTree (before %d):\n", thisMorphNum); - gtDispTree(tree); - } -#endif - - if (fgGlobalMorph) - { - // Apply any rewrites for implicit byref arguments before morphing the - // tree. - - if (fgMorphImplicitByRefArgs(tree)) - { -#ifdef DEBUG - if (verbose && treesBeforeAfterMorph) - { - printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum); - gtDispTree(tree); - } -#endif - } - } - -/*------------------------------------------------------------------------- - * fgMorphTree() can potentially replace a tree with another, and the - * caller has to store the return value correctly. - * Turn this on to always make copy of "tree" here to shake out - * hidden/unupdated references. - */ - -#ifdef DEBUG - - if (compStressCompile(STRESS_GENERIC_CHECK, 0)) - { - GenTree* copy; - - if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL) - { - copy = gtNewLargeOperNode(GT_ADD, TYP_INT); - } - else - { - copy = new (this, GT_CALL) GenTreeCall(TYP_INT); - } - - copy->ReplaceWith(tree, this); - -#if defined(LATE_DISASM) - // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields - if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle()) - { - copy->AsIntCon()->gtCompileTimeHandle = tree->AsIntCon()->gtCompileTimeHandle; - } -#endif - - DEBUG_DESTROY_NODE(tree); - tree = copy; - } -#endif // DEBUG - - if (fgGlobalMorph) - { - /* Ensure that we haven't morphed this node already */ - assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!"); - -#if LOCAL_ASSERTION_PROP - /* Before morphing the tree, we try to propagate any active assertions */ - if (optLocalAssertionProp) - { - /* Do we have any active assertions? */ - - if (optAssertionCount > 0) - { - GenTree* newTree = tree; - while (newTree != nullptr) - { - tree = newTree; - /* newTree is non-Null if we propagated an assertion */ - newTree = optAssertionProp(apFull, tree, nullptr, nullptr); - } - assert(tree != nullptr); - } - } - PREFAST_ASSUME(tree != nullptr); -#endif - } - - /* Save the original un-morphed tree for fgMorphTreeDone */ - - GenTree* oldTree = tree; - - /* Figure out what kind of a node we have */ - - unsigned kind = tree->OperKind(); - - /* Is this a constant node? */ - - if (kind & GTK_CONST) - { - tree = fgMorphConst(tree); - goto DONE; - } - - /* Is this a leaf node? */ - - if (kind & GTK_LEAF) - { - tree = fgMorphLeaf(tree); - goto DONE; - } - - /* Is it a 'simple' unary/binary operator? */ - - if (kind & GTK_SMPOP) - { - tree = fgMorphSmpOp(tree, mac); - goto DONE; - } - - /* See what kind of a special operator we have here */ - - switch (tree->OperGet()) - { - case GT_FIELD: - tree = fgMorphField(tree, mac); - break; - - case GT_CALL: - if (tree->OperMayThrow(this)) - { - tree->gtFlags |= GTF_EXCEPT; - } - else - { - tree->gtFlags &= ~GTF_EXCEPT; - } - tree = fgMorphCall(tree->AsCall()); - break; - - case GT_ARR_BOUNDS_CHECK: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif // FEATURE_SIMD -#ifdef FEATURE_HW_INTRINSICS - case GT_HW_INTRINSIC_CHK: -#endif // FEATURE_HW_INTRINSICS - { - fgSetRngChkTarget(tree); - - GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); - bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex); - bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen); - // If the index is a comma(throw, x), just return that. - if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex)) - { - tree = bndsChk->gtIndex; - } - - bndsChk->gtFlags &= ~GTF_CALL; - - // Propagate effects flags upwards - bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT); - bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT); - - // Otherwise, we don't change the tree. - } - break; - - case GT_ARR_ELEM: - tree->AsArrElem()->gtArrObj = fgMorphTree(tree->AsArrElem()->gtArrObj); - - unsigned dim; - for (dim = 0; dim < tree->AsArrElem()->gtArrRank; dim++) - { - tree->AsArrElem()->gtArrInds[dim] = fgMorphTree(tree->AsArrElem()->gtArrInds[dim]); - } - - tree->gtFlags &= ~GTF_CALL; - - tree->gtFlags |= tree->AsArrElem()->gtArrObj->gtFlags & GTF_ALL_EFFECT; - - for (dim = 0; dim < tree->AsArrElem()->gtArrRank; dim++) - { - tree->gtFlags |= tree->AsArrElem()->gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT; - } - - if (fgGlobalMorph) - { - fgSetRngChkTarget(tree, false); - } - break; - - case GT_ARR_OFFSET: - tree->AsArrOffs()->gtOffset = fgMorphTree(tree->AsArrOffs()->gtOffset); - tree->AsArrOffs()->gtIndex = fgMorphTree(tree->AsArrOffs()->gtIndex); - tree->AsArrOffs()->gtArrObj = fgMorphTree(tree->AsArrOffs()->gtArrObj); - - tree->gtFlags &= ~GTF_CALL; - tree->gtFlags |= tree->AsArrOffs()->gtOffset->gtFlags & GTF_ALL_EFFECT; - tree->gtFlags |= tree->AsArrOffs()->gtIndex->gtFlags & GTF_ALL_EFFECT; - tree->gtFlags |= tree->AsArrOffs()->gtArrObj->gtFlags & GTF_ALL_EFFECT; - if (fgGlobalMorph) - { - fgSetRngChkTarget(tree, false); - } - break; - - case GT_PHI: - tree->gtFlags &= ~GTF_ALL_EFFECT; - for (GenTreePhi::Use& use : tree->AsPhi()->Uses()) - { - use.SetNode(fgMorphTree(use.GetNode())); - tree->gtFlags |= use.GetNode()->gtFlags & GTF_ALL_EFFECT; - } - break; - - case GT_FIELD_LIST: - tree->gtFlags &= ~GTF_ALL_EFFECT; - for (GenTreeFieldList::Use& use : tree->AsFieldList()->Uses()) - { - use.SetNode(fgMorphTree(use.GetNode())); - tree->gtFlags |= (use.GetNode()->gtFlags & GTF_ALL_EFFECT); - } - break; - - case GT_CMPXCHG: - tree->AsCmpXchg()->gtOpLocation = fgMorphTree(tree->AsCmpXchg()->gtOpLocation); - tree->AsCmpXchg()->gtOpValue = fgMorphTree(tree->AsCmpXchg()->gtOpValue); - tree->AsCmpXchg()->gtOpComparand = fgMorphTree(tree->AsCmpXchg()->gtOpComparand); - - tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL); - - tree->gtFlags |= tree->AsCmpXchg()->gtOpLocation->gtFlags & GTF_ALL_EFFECT; - tree->gtFlags |= tree->AsCmpXchg()->gtOpValue->gtFlags & GTF_ALL_EFFECT; - tree->gtFlags |= tree->AsCmpXchg()->gtOpComparand->gtFlags & GTF_ALL_EFFECT; - break; - - case GT_STORE_DYN_BLK: - case GT_DYN_BLK: - if (tree->OperGet() == GT_STORE_DYN_BLK) - { - tree->AsDynBlk()->Data() = fgMorphTree(tree->AsDynBlk()->Data()); - } - tree->AsDynBlk()->Addr() = fgMorphTree(tree->AsDynBlk()->Addr()); - tree->AsDynBlk()->gtDynamicSize = fgMorphTree(tree->AsDynBlk()->gtDynamicSize); - - tree->gtFlags &= ~GTF_CALL; - tree->SetIndirExceptionFlags(this); - - if (tree->OperGet() == GT_STORE_DYN_BLK) - { - tree->gtFlags |= tree->AsDynBlk()->Data()->gtFlags & GTF_ALL_EFFECT; - } - tree->gtFlags |= tree->AsDynBlk()->Addr()->gtFlags & GTF_ALL_EFFECT; - tree->gtFlags |= tree->AsDynBlk()->gtDynamicSize->gtFlags & GTF_ALL_EFFECT; - break; - - case GT_INDEX_ADDR: - GenTreeIndexAddr* indexAddr; - indexAddr = tree->AsIndexAddr(); - indexAddr->Index() = fgMorphTree(indexAddr->Index()); - indexAddr->Arr() = fgMorphTree(indexAddr->Arr()); - - tree->gtFlags &= ~GTF_CALL; - - tree->gtFlags |= indexAddr->Index()->gtFlags & GTF_ALL_EFFECT; - tree->gtFlags |= indexAddr->Arr()->gtFlags & GTF_ALL_EFFECT; - break; - - default: -#ifdef DEBUG - gtDispTree(tree); -#endif - noway_assert(!"unexpected operator"); - } -DONE: - - fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum)); - - return tree; -} - -#if LOCAL_ASSERTION_PROP -//------------------------------------------------------------------------ -// fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum -// -// Arguments: -// lclNum - The varNum of the lclVar for which we're killing assertions. -// tree - (DEBUG only) the tree responsible for killing its assertions. -// -void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree)) -{ - /* All dependent assertions are killed here */ - - ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum)); - - if (killed) - { - AssertionIndex index = optAssertionCount; - while (killed && (index > 0)) - { - if (BitVecOps::IsMember(apTraits, killed, index - 1)) - { -#ifdef DEBUG - AssertionDsc* curAssertion = optGetAssertion(index); - noway_assert((curAssertion->op1.lcl.lclNum == lclNum) || - ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum))); - if (verbose) - { - printf("\nThe assignment "); - printTreeID(tree); - printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum); - optPrintAssertion(curAssertion); - } -#endif - // Remove this bit from the killed mask - BitVecOps::RemoveElemD(apTraits, killed, index - 1); - - optAssertionRemove(index); - } - - index--; - } - - // killed mask should now be zero - noway_assert(BitVecOps::IsEmpty(apTraits, killed)); - } -} -//------------------------------------------------------------------------ -// fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum. -// -// Arguments: -// lclNum - The varNum of the lclVar for which we're killing assertions. -// tree - (DEBUG only) the tree responsible for killing its assertions. -// -// Notes: -// For structs and struct fields, it will invalidate the children and parent -// respectively. -// Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar. -// -void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree)) -{ - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (varDsc->lvPromoted) - { - noway_assert(varTypeIsStruct(varDsc)); - - // Kill the field locals. - for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) - { - fgKillDependentAssertionsSingle(i DEBUGARG(tree)); - } - - // Kill the struct local itself. - fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); - } - else if (varDsc->lvIsStructField) - { - // Kill the field local. - fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); - - // Kill the parent struct. - fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree)); - } - else - { - fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); - } -} -#endif // LOCAL_ASSERTION_PROP - -/***************************************************************************** - * - * This function is called to complete the morphing of a tree node - * It should only be called once for each node. - * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated, - * to enforce the invariant that each node is only morphed once. - * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced - * by an equivalent tree. - * - */ - -void Compiler::fgMorphTreeDone(GenTree* tree, - GenTree* oldTree /* == NULL */ - DEBUGARG(int morphNum)) -{ -#ifdef DEBUG - if (verbose && treesBeforeAfterMorph) - { - printf("\nfgMorphTree (after %d):\n", morphNum); - gtDispTree(tree); - printf(""); // in our logic this causes a flush - } -#endif - - if (!fgGlobalMorph) - { - return; - } - - if ((oldTree != nullptr) && (oldTree != tree)) - { - /* Ensure that we have morphed this node */ - assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!"); - -#ifdef DEBUG - TransferTestDataToNode(oldTree, tree); -#endif - } - else - { - // Ensure that we haven't morphed this node already - assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!"); - } - - if (tree->OperKind() & GTK_CONST) - { - goto DONE; - } - -#if LOCAL_ASSERTION_PROP - - if (!optLocalAssertionProp) - { - goto DONE; - } - - /* Do we have any active assertions? */ - - if (optAssertionCount > 0) - { - /* Is this an assignment to a local variable */ - GenTreeLclVarCommon* lclVarTree = nullptr; - - // The check below will miss LIR-style assignments. - // - // But we shouldn't be running local assertion prop on these, - // as local prop gets disabled when we run global prop. - assert(!tree->OperIs(GT_STORE_LCL_VAR, GT_STORE_LCL_FLD)); - - // DefinesLocal can return true for some BLK op uses, so - // check what gets assigned only when we're at an assignment. - if (tree->OperIs(GT_ASG) && tree->DefinesLocal(this, &lclVarTree)) - { - unsigned lclNum = lclVarTree->GetLclNum(); - noway_assert(lclNum < lvaCount); - fgKillDependentAssertions(lclNum DEBUGARG(tree)); - } - } - - /* If this tree makes a new assertion - make it available */ - optAssertionGen(tree); - -#endif // LOCAL_ASSERTION_PROP - -DONE:; - -#ifdef DEBUG - /* Mark this node as being morphed */ - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif -} - -/***************************************************************************** - * - * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants - * Returns true if we modified the flow graph - */ - -bool Compiler::fgFoldConditional(BasicBlock* block) -{ - bool result = false; - - // We don't want to make any code unreachable - if (opts.OptimizationDisabled()) - { - return false; - } - - if (block->bbJumpKind == BBJ_COND) - { - noway_assert(block->bbStmtList != nullptr && block->bbStmtList->GetPrevStmt() != nullptr); - - Statement* lastStmt = block->lastStmt(); - - noway_assert(lastStmt->GetNextStmt() == nullptr); - - if (lastStmt->GetRootNode()->gtOper == GT_CALL) - { - noway_assert(fgRemoveRestOfBlock); - - /* Unconditional throw - transform the basic block into a BBJ_THROW */ - fgConvertBBToThrowBB(block); - -#ifdef DEBUG - if (verbose) - { - printf("\nConditional folded at " FMT_BB "\n", block->bbNum); - printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum); - } -#endif - goto DONE_COND; - } - - noway_assert(lastStmt->GetRootNode()->gtOper == GT_JTRUE); - - /* Did we fold the conditional */ - - noway_assert(lastStmt->GetRootNode()->AsOp()->gtOp1); - GenTree* condTree; - condTree = lastStmt->GetRootNode()->AsOp()->gtOp1; - GenTree* cond; - cond = condTree->gtEffectiveVal(true); - - if (cond->OperKind() & GTK_CONST) - { - /* Yupee - we folded the conditional! - * Remove the conditional statement */ - - noway_assert(cond->gtOper == GT_CNS_INT); - noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0)); - - if (condTree != cond) - { - // Preserve any side effects - assert(condTree->OperIs(GT_COMMA)); - lastStmt->SetRootNode(condTree); - } - else - { - // no side effects, remove the jump entirely - fgRemoveStmt(block, lastStmt); - } - // block is a BBJ_COND that we are folding the conditional for. - // bTaken is the path that will always be taken from block. - // bNotTaken is the path that will never be taken from block. - // - BasicBlock* bTaken; - BasicBlock* bNotTaken; - - if (cond->AsIntCon()->gtIconVal != 0) - { - /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */ - block->bbJumpKind = BBJ_ALWAYS; - bTaken = block->bbJumpDest; - bNotTaken = block->bbNext; - } - else - { - /* Unmark the loop if we are removing a backwards branch */ - /* dest block must also be marked as a loop head and */ - /* We must be able to reach the backedge block */ - if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && - fgReachable(block->bbJumpDest, block)) - { - optUnmarkLoopBlocks(block->bbJumpDest, block); - } - - /* JTRUE 0 - transform the basic block into a BBJ_NONE */ - block->bbJumpKind = BBJ_NONE; - bTaken = block->bbNext; - bNotTaken = block->bbJumpDest; - } - - if (fgHaveValidEdgeWeights) - { - // We are removing an edge from block to bNotTaken - // and we have already computed the edge weights, so - // we will try to adjust some of the weights - // - flowList* edgeTaken = fgGetPredForBlock(bTaken, block); - BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block - - // We examine the taken edge (block -> bTaken) - // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight - // else if bTaken has valid profile weight and block does not we try to adjust block's weight - // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken - // - if (block->hasProfileWeight()) - { - // The edge weights for (block -> bTaken) are 100% of block's weight - - edgeTaken->setEdgeWeights(block->bbWeight, block->bbWeight, bTaken); - - if (!bTaken->hasProfileWeight()) - { - if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight)) - { - // Update the weight of bTaken - bTaken->inheritWeight(block); - bUpdated = bTaken; - } - } - } - else if (bTaken->hasProfileWeight()) - { - if (bTaken->countOfInEdges() == 1) - { - // There is only one in edge to bTaken - edgeTaken->setEdgeWeights(bTaken->bbWeight, bTaken->bbWeight, bTaken); - - // Update the weight of block - block->inheritWeight(bTaken); - bUpdated = block; - } - } - - if (bUpdated != nullptr) - { - BasicBlock::weight_t newMinWeight; - BasicBlock::weight_t newMaxWeight; - - flowList* edge; - // Now fix the weights of the edges out of 'bUpdated' - switch (bUpdated->bbJumpKind) - { - case BBJ_NONE: - edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); - newMaxWeight = bUpdated->bbWeight; - newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); - break; - - case BBJ_COND: - edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); - newMaxWeight = bUpdated->bbWeight; - newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); - FALLTHROUGH; - - case BBJ_ALWAYS: - edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated); - newMaxWeight = bUpdated->bbWeight; - newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); - edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); - break; - - default: - // We don't handle BBJ_SWITCH - break; - } - } - } - - /* modify the flow graph */ - - /* Remove 'block' from the predecessor list of 'bNotTaken' */ - fgRemoveRefPred(bNotTaken, block); - -#ifdef DEBUG - if (verbose) - { - printf("\nConditional folded at " FMT_BB "\n", block->bbNum); - printf(FMT_BB " becomes a %s", block->bbNum, - block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE"); - if (block->bbJumpKind == BBJ_ALWAYS) - { - printf(" to " FMT_BB, block->bbJumpDest->bbNum); - } - printf("\n"); - } -#endif - - /* if the block was a loop condition we may have to modify - * the loop table */ - - for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) - { - /* Some loops may have been already removed by - * loop unrolling or conditional folding */ - - if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) - { - continue; - } - - /* We are only interested in the loop bottom */ - - if (optLoopTable[loopNum].lpBottom == block) - { - if (cond->AsIntCon()->gtIconVal == 0) - { - /* This was a bogus loop (condition always false) - * Remove the loop from the table */ - - optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; -#if FEATURE_LOOP_ALIGN - optLoopTable[loopNum].lpFirst->bbFlags &= ~BBF_LOOP_ALIGN; - JITDUMP("Removing LOOP_ALIGN flag from bogus loop in " FMT_BB "\n", - optLoopTable[loopNum].lpFirst->bbNum); -#endif - -#ifdef DEBUG - if (verbose) - { - printf("Removing loop " FMT_LP " (from " FMT_BB " to " FMT_BB ")\n\n", loopNum, - optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum); - } -#endif - } - } - } - DONE_COND: - result = true; - } - } - else if (block->bbJumpKind == BBJ_SWITCH) - { - noway_assert(block->bbStmtList != nullptr && block->bbStmtList->GetPrevStmt() != nullptr); - - Statement* lastStmt = block->lastStmt(); - - noway_assert(lastStmt->GetNextStmt() == nullptr); - - if (lastStmt->GetRootNode()->gtOper == GT_CALL) - { - noway_assert(fgRemoveRestOfBlock); - - /* Unconditional throw - transform the basic block into a BBJ_THROW */ - fgConvertBBToThrowBB(block); - -#ifdef DEBUG - if (verbose) - { - printf("\nConditional folded at " FMT_BB "\n", block->bbNum); - printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum); - } -#endif - goto DONE_SWITCH; - } - - noway_assert(lastStmt->GetRootNode()->gtOper == GT_SWITCH); - - /* Did we fold the conditional */ - - noway_assert(lastStmt->GetRootNode()->AsOp()->gtOp1); - GenTree* condTree; - condTree = lastStmt->GetRootNode()->AsOp()->gtOp1; - GenTree* cond; - cond = condTree->gtEffectiveVal(true); - - if (cond->OperKind() & GTK_CONST) - { - /* Yupee - we folded the conditional! - * Remove the conditional statement */ - - noway_assert(cond->gtOper == GT_CNS_INT); - - if (condTree != cond) - { - // Preserve any side effects - assert(condTree->OperIs(GT_COMMA)); - lastStmt->SetRootNode(condTree); - } - else - { - // no side effects, remove the switch entirely - fgRemoveStmt(block, lastStmt); - } - - /* modify the flow graph */ - - /* Find the actual jump target */ - unsigned switchVal; - switchVal = (unsigned)cond->AsIntCon()->gtIconVal; - unsigned jumpCnt; - jumpCnt = block->bbJumpSwt->bbsCount; - BasicBlock** jumpTab; - jumpTab = block->bbJumpSwt->bbsDstTab; - bool foundVal; - foundVal = false; - - for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) - { - BasicBlock* curJump = *jumpTab; - - assert(curJump->countOfInEdges() > 0); - - // If val matches switchVal or we are at the last entry and - // we never found the switch value then set the new jump dest - - if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1))) - { - if (curJump != block->bbNext) - { - /* transform the basic block into a BBJ_ALWAYS */ - block->bbJumpKind = BBJ_ALWAYS; - block->bbJumpDest = curJump; - } - else - { - /* transform the basic block into a BBJ_NONE */ - block->bbJumpKind = BBJ_NONE; - } - foundVal = true; - } - else - { - /* Remove 'block' from the predecessor list of 'curJump' */ - fgRemoveRefPred(curJump, block); - } - } -#ifdef DEBUG - if (verbose) - { - printf("\nConditional folded at " FMT_BB "\n", block->bbNum); - printf(FMT_BB " becomes a %s", block->bbNum, - block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE"); - if (block->bbJumpKind == BBJ_ALWAYS) - { - printf(" to " FMT_BB, block->bbJumpDest->bbNum); - } - printf("\n"); - } -#endif - DONE_SWITCH: - result = true; - } - } - return result; -} - -//------------------------------------------------------------------------ -// fgMorphBlockStmt: morph a single statement in a block. -// -// Arguments: -// block - block containing the statement -// stmt - statement to morph -// msg - string to identify caller in a dump -// -// Returns: -// true if 'stmt' was removed from the block. -// s false if 'stmt' is still in the block (even if other statements were removed). -// -// Notes: -// Can be called anytime, unlike fgMorphStmts() which should only be called once. -// -bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg)) -{ - assert(block != nullptr); - assert(stmt != nullptr); - - // Reset some ambient state - fgRemoveRestOfBlock = false; - compCurBB = block; - compCurStmt = stmt; - - GenTree* morph = fgMorphTree(stmt->GetRootNode()); - - // Bug 1106830 - During the CSE phase we can't just remove - // morph->AsOp()->gtOp2 as it could contain CSE expressions. - // This leads to a noway_assert in OptCSE.cpp when - // searching for the removed CSE ref. (using gtFindLink) - // - if (!optValnumCSE_phase) - { - // Check for morph as a GT_COMMA with an unconditional throw - if (fgIsCommaThrow(morph, true)) - { -#ifdef DEBUG - if (verbose) - { - printf("Folding a top-level fgIsCommaThrow stmt\n"); - printf("Removing op2 as unreachable:\n"); - gtDispTree(morph->AsOp()->gtOp2); - printf("\n"); - } -#endif - // Use the call as the new stmt - morph = morph->AsOp()->gtOp1; - noway_assert(morph->gtOper == GT_CALL); - } - - // we can get a throw as a statement root - if (fgIsThrow(morph)) - { -#ifdef DEBUG - if (verbose) - { - printf("We have a top-level fgIsThrow stmt\n"); - printf("Removing the rest of block as unreachable:\n"); - } -#endif - noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); - fgRemoveRestOfBlock = true; - } - } - - stmt->SetRootNode(morph); - - // Can the entire tree be removed? - bool removedStmt = false; - - // Defer removing statements during CSE so we don't inadvertently remove any CSE defs. - if (!optValnumCSE_phase) - { - removedStmt = fgCheckRemoveStmt(block, stmt); - } - - // Or this is the last statement of a conditional branch that was just folded? - if (!removedStmt && (stmt->GetNextStmt() == nullptr) && !fgRemoveRestOfBlock) - { - if (fgFoldConditional(block)) - { - if (block->bbJumpKind != BBJ_THROW) - { - removedStmt = true; - } - } - } - - if (!removedStmt) - { - // Have to re-do the evaluation order since for example some later code does not expect constants as op1 - gtSetStmtInfo(stmt); - - // Have to re-link the nodes for this statement - fgSetStmtSeq(stmt); - } - -#ifdef DEBUG - if (verbose) - { - printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed")); - gtDispTree(morph); - printf("\n"); - } -#endif - - if (fgRemoveRestOfBlock) - { - // Remove the rest of the stmts in the block - for (Statement* removeStmt : StatementList(stmt->GetNextStmt())) - { - fgRemoveStmt(block, removeStmt); - } - - // The rest of block has been removed and we will always throw an exception. - // - // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE. - // We should not convert it to a ThrowBB. - if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0)) - { - // Convert block to a throw bb - fgConvertBBToThrowBB(block); - } - -#ifdef DEBUG - if (verbose) - { - printf("\n%s Block " FMT_BB " becomes a throw block.\n", msg, block->bbNum); - } -#endif - fgRemoveRestOfBlock = false; - } - - return removedStmt; -} - -/***************************************************************************** - * - * Morph the statements of the given block. - * This function should be called just once for a block. Use fgMorphBlockStmt() - * for reentrant calls. - */ - -void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw) -{ - fgRemoveRestOfBlock = false; - - *lnot = *loadw = false; - - fgCurrentlyInUseArgTemps = hashBv::Create(this); - - for (Statement* stmt : block->Statements()) - { - if (fgRemoveRestOfBlock) - { - fgRemoveStmt(block, stmt); - continue; - } -#ifdef FEATURE_SIMD - if (opts.OptimizationEnabled() && stmt->GetRootNode()->TypeGet() == TYP_FLOAT && - stmt->GetRootNode()->OperGet() == GT_ASG) - { - fgMorphCombineSIMDFieldAssignments(block, stmt); - } -#endif - - fgMorphStmt = stmt; - compCurStmt = stmt; - GenTree* oldTree = stmt->GetRootNode(); - -#ifdef DEBUG - - unsigned oldHash = verbose ? gtHashValue(oldTree) : DUMMY_INIT(~0); - - if (verbose) - { - printf("\nfgMorphTree " FMT_BB ", " FMT_STMT " (before)\n", block->bbNum, stmt->GetID()); - gtDispTree(oldTree); - } -#endif - - /* Morph this statement tree */ - - GenTree* morphedTree = fgMorphTree(oldTree); - - // mark any outgoing arg temps as free so we can reuse them in the next statement. - - fgCurrentlyInUseArgTemps->ZeroAll(); - - // Has fgMorphStmt been sneakily changed ? - - if ((stmt->GetRootNode() != oldTree) || (block != compCurBB)) - { - if (stmt->GetRootNode() != oldTree) - { - /* This must be tailcall. Ignore 'morphedTree' and carry on with - the tail-call node */ - - morphedTree = stmt->GetRootNode(); - } - else - { - /* This must be a tailcall that caused a GCPoll to get - injected. We haven't actually morphed the call yet - but the flag still got set, clear it here... */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - morphedTree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; -#endif - } - - noway_assert(compTailCallUsed); - noway_assert(morphedTree->gtOper == GT_CALL); - GenTreeCall* call = morphedTree->AsCall(); - // Could be - // - a fast call made as jmp in which case block will be ending with - // BBJ_RETURN (as we need epilog) and marked as containing a jmp. - // - a tailcall dispatched via JIT helper, on x86, in which case - // block will be ending with BBJ_THROW. - // - a tail call dispatched via runtime help (IL stubs), in which - // case there will not be any tailcall and the block will be ending - // with BBJ_RETURN (as normal control flow) - noway_assert((call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && - ((compCurBB->bbFlags & BBF_HAS_JMP)) != 0) || - (call->IsTailCallViaJitHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || - (!call->IsTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN))); - } - -#ifdef DEBUG - if (compStressCompile(STRESS_CLONE_EXPR, 30)) - { - // Clone all the trees to stress gtCloneExpr() - - if (verbose) - { - printf("\nfgMorphTree (stressClone from):\n"); - gtDispTree(morphedTree); - } - - morphedTree = gtCloneExpr(morphedTree); - noway_assert(morphedTree != nullptr); - - if (verbose) - { - printf("\nfgMorphTree (stressClone to):\n"); - gtDispTree(morphedTree); - } - } - - /* If the hash value changes. we modified the tree during morphing */ - if (verbose) - { - unsigned newHash = gtHashValue(morphedTree); - if (newHash != oldHash) - { - printf("\nfgMorphTree " FMT_BB ", " FMT_STMT " (after)\n", block->bbNum, stmt->GetID()); - gtDispTree(morphedTree); - } - } -#endif - - /* Check for morphedTree as a GT_COMMA with an unconditional throw */ - if (!gtIsActiveCSE_Candidate(morphedTree) && fgIsCommaThrow(morphedTree, true)) - { - /* Use the call as the new stmt */ - morphedTree = morphedTree->AsOp()->gtOp1; - noway_assert(morphedTree->gtOper == GT_CALL); - noway_assert((morphedTree->gtFlags & GTF_COLON_COND) == 0); - - fgRemoveRestOfBlock = true; - } - - stmt->SetRootNode(morphedTree); - - if (fgRemoveRestOfBlock) - { - continue; - } - - /* Has the statement been optimized away */ - - if (fgCheckRemoveStmt(block, stmt)) - { - continue; - } - - /* Check if this block ends with a conditional branch that can be folded */ - - if (fgFoldConditional(block)) - { - continue; - } - - if (ehBlockHasExnFlowDsc(block)) - { - continue; - } - } - - if (fgRemoveRestOfBlock) - { - if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH)) - { - Statement* first = block->firstStmt(); - noway_assert(first); - Statement* lastStmt = block->lastStmt(); - noway_assert(lastStmt && lastStmt->GetNextStmt() == nullptr); - GenTree* last = lastStmt->GetRootNode(); - - if (((block->bbJumpKind == BBJ_COND) && (last->gtOper == GT_JTRUE)) || - ((block->bbJumpKind == BBJ_SWITCH) && (last->gtOper == GT_SWITCH))) - { - GenTree* op1 = last->AsOp()->gtOp1; - - if (op1->OperKind() & GTK_RELOP) - { - /* Unmark the comparison node with GTF_RELOP_JMP_USED */ - op1->gtFlags &= ~GTF_RELOP_JMP_USED; - } - - lastStmt->SetRootNode(fgMorphTree(op1)); - } - } - - /* Mark block as a BBJ_THROW block */ - fgConvertBBToThrowBB(block); - } - -#if FEATURE_FASTTAILCALL - GenTree* recursiveTailCall = nullptr; - if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall)) - { - fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall()); - } -#endif - - // Reset this back so that it doesn't leak out impacting other blocks - fgRemoveRestOfBlock = false; -} - -/***************************************************************************** - * - * Morph the blocks of the method. - * Returns true if the basic block list is modified. - * This function should be called just once. - */ - -void Compiler::fgMorphBlocks() -{ -#ifdef DEBUG - if (verbose) - { - printf("\n*************** In fgMorphBlocks()\n"); - } -#endif - - /* Since fgMorphTree can be called after various optimizations to re-arrange - * the nodes we need a global flag to signal if we are during the one-pass - * global morphing */ - - fgGlobalMorph = true; - -#if LOCAL_ASSERTION_PROP - // - // Local assertion prop is enabled if we are optimized - // - optLocalAssertionProp = opts.OptimizationEnabled(); - - if (optLocalAssertionProp) - { - // - // Initialize for local assertion prop - // - optAssertionInit(true); - } -#elif ASSERTION_PROP - // - // If LOCAL_ASSERTION_PROP is not set - // and we have global assertion prop - // then local assertion prop is always off - // - optLocalAssertionProp = false; - -#endif - - /*------------------------------------------------------------------------- - * Process all basic blocks in the function - */ - - BasicBlock* block = fgFirstBB; - noway_assert(block); - - do - { -#if OPT_BOOL_OPS - bool lnot = false; -#endif - - bool loadw = false; - -#ifdef DEBUG - if (verbose) - { - printf("\nMorphing " FMT_BB " of '%s'\n", block->bbNum, info.compFullName); - } -#endif - -#if LOCAL_ASSERTION_PROP - if (optLocalAssertionProp) - { - // - // Clear out any currently recorded assertion candidates - // before processing each basic block, - // also we must handle QMARK-COLON specially - // - optAssertionReset(0); - } -#endif - // Make the current basic block address available globally. - compCurBB = block; - - // Process all statement trees in the basic block. - fgMorphStmts(block, &lnot, &loadw); - - // Do we need to merge the result of this block into a single return block? - if ((block->bbJumpKind == BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)) - { - if ((genReturnBB != nullptr) && (genReturnBB != block)) - { - fgMergeBlockReturn(block); - } - } - - block = block->bbNext; - } while (block != nullptr); - - // We are done with the global morphing phase - fgGlobalMorph = false; - compCurBB = nullptr; - - // Under OSR, we no longer need to specially protect the original method entry - // - if (opts.IsOSR() && (fgEntryBB != nullptr) && (fgEntryBB->bbFlags & BBF_IMPORTED)) - { - JITDUMP("OSR: un-protecting original method entry " FMT_BB "\n", fgEntryBB->bbNum); - assert(fgEntryBB->bbRefs > 0); - fgEntryBB->bbRefs--; - // We don't need to remember this block anymore. - fgEntryBB = nullptr; - } - -#ifdef DEBUG - if (verboseTrees) - { - fgDispBasicBlocks(true); - } -#endif -} - -//------------------------------------------------------------------------ -// fgMergeBlockReturn: assign the block return value (if any) into the single return temp -// and branch to the single return block. -// -// Arguments: -// block - the block to process. -// -// Notes: -// A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN. -// For example a method returning void could have an empty block with jump kind BBJ_RETURN. -// Such blocks do materialize as part of in-lining. -// -// A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN. -// It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC. -// For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal -// is BAD_VAR_NUM. -// -void Compiler::fgMergeBlockReturn(BasicBlock* block) -{ - assert((block->bbJumpKind == BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)); - assert((genReturnBB != nullptr) && (genReturnBB != block)); - - // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN. - - Statement* lastStmt = block->lastStmt(); - GenTree* ret = (lastStmt != nullptr) ? lastStmt->GetRootNode() : nullptr; - - if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0)) - { - // This return was generated during epilog merging, so leave it alone - } - else - { - // We'll jump to the genReturnBB. - CLANG_FORMAT_COMMENT_ANCHOR; - -#if !defined(TARGET_X86) - if (info.compFlags & CORINFO_FLG_SYNCH) - { - fgConvertSyncReturnToLeave(block); - } - else -#endif // !TARGET_X86 - { - block->bbJumpKind = BBJ_ALWAYS; - block->bbJumpDest = genReturnBB; - fgAddRefPred(genReturnBB, block); - fgReturnCount--; - } - if (genReturnLocal != BAD_VAR_NUM) - { - // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal. - - // Method must be returning a value other than TYP_VOID. - noway_assert(compMethodHasRetVal()); - - // This block must be ending with a GT_RETURN - noway_assert(lastStmt != nullptr); - noway_assert(lastStmt->GetNextStmt() == nullptr); - noway_assert(ret != nullptr); - - // GT_RETURN must have non-null operand as the method is returning the value assigned to - // genReturnLocal - noway_assert(ret->OperGet() == GT_RETURN); - noway_assert(ret->gtGetOp1() != nullptr); - - Statement* pAfterStatement = lastStmt; - IL_OFFSETX offset = lastStmt->GetILOffsetX(); - GenTree* tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1(), &pAfterStatement, offset, block); - if (tree->OperIsCopyBlkOp()) - { - tree = fgMorphCopyBlock(tree); - } - - if (pAfterStatement == lastStmt) - { - lastStmt->SetRootNode(tree); - } - else - { - // gtNewTempAssign inserted additional statements after last - fgRemoveStmt(block, lastStmt); - Statement* newStmt = gtNewStmt(tree, offset); - fgInsertStmtAfter(block, pAfterStatement, newStmt); - lastStmt = newStmt; - } - } - else if (ret != nullptr && ret->OperGet() == GT_RETURN) - { - // This block ends with a GT_RETURN - noway_assert(lastStmt != nullptr); - noway_assert(lastStmt->GetNextStmt() == nullptr); - - // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn - // block - noway_assert(ret->TypeGet() == TYP_VOID); - noway_assert(ret->gtGetOp1() == nullptr); - - fgRemoveStmt(block, lastStmt); - } - - JITDUMP("\nUpdate " FMT_BB " to jump to common return block.\n", block->bbNum); - DISPBLOCK(block); - - if (block->hasProfileWeight()) - { - BasicBlock::weight_t const oldWeight = - genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : BB_ZERO_WEIGHT; - BasicBlock::weight_t const newWeight = oldWeight + block->bbWeight; - - JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, - block->bbNum, genReturnBB->bbNum); - - genReturnBB->setBBProfileWeight(newWeight); - DISPBLOCK(genReturnBB); - } - } -} - -/***************************************************************************** - * - * Make some decisions about the kind of code to generate. - */ - -void Compiler::fgSetOptions() -{ -#ifdef DEBUG - /* Should we force fully interruptible code ? */ - if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30)) - { - noway_assert(!codeGen->isGCTypeFixed()); - SetInterruptible(true); - } -#endif - - if (opts.compDbgCode) - { - assert(!codeGen->isGCTypeFixed()); - SetInterruptible(true); // debugging is easier this way ... - } - - /* Assume we won't need an explicit stack frame if this is allowed */ - - if (compLocallocUsed) - { - codeGen->setFramePointerRequired(true); - } - -#ifdef TARGET_X86 - - if (compTailCallUsed) - codeGen->setFramePointerRequired(true); - -#endif // TARGET_X86 - - if (!opts.genFPopt) - { - codeGen->setFramePointerRequired(true); - } - - // Assert that the EH table has been initialized by now. Note that - // compHndBBtabAllocCount never decreases; it is a high-water mark - // of table allocation. In contrast, compHndBBtabCount does shrink - // if we delete a dead EH region, and if it shrinks to zero, the - // table pointer compHndBBtab is unreliable. - assert(compHndBBtabAllocCount >= info.compXcptnsCount); - -#ifdef TARGET_X86 - - // Note: this case, and the !X86 case below, should both use the - // !X86 path. This would require a few more changes for X86 to use - // compHndBBtabCount (the current number of EH clauses) instead of - // info.compXcptnsCount (the number of EH clauses in IL), such as - // in ehNeedsShadowSPslots(). This is because sometimes the IL has - // an EH clause that we delete as statically dead code before we - // get here, leaving no EH clauses left, and thus no requirement - // to use a frame pointer because of EH. But until all the code uses - // the same test, leave info.compXcptnsCount here. - if (info.compXcptnsCount > 0) - { - codeGen->setFramePointerRequiredEH(true); - } - -#else // !TARGET_X86 - - if (compHndBBtabCount > 0) - { - codeGen->setFramePointerRequiredEH(true); - } - -#endif // TARGET_X86 - -#ifdef UNIX_X86_ABI - if (info.compXcptnsCount > 0) - { - assert(!codeGen->isGCTypeFixed()); - // Enforce fully interruptible codegen for funclet unwinding - SetInterruptible(true); - } -#endif // UNIX_X86_ABI - - if (compMethodRequiresPInvokeFrame()) - { - codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame - } - - if (info.compPublishStubParam) - { - codeGen->setFramePointerRequiredGCInfo(true); - } - - if (compIsProfilerHookNeeded()) - { - codeGen->setFramePointerRequired(true); - } - - if (info.compIsVarArgs) - { - // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative. - codeGen->setFramePointerRequiredGCInfo(true); - } - - if (lvaReportParamTypeArg()) - { - codeGen->setFramePointerRequiredGCInfo(true); - } - - // printf("method will %s be fully interruptible\n", GetInterruptible() ? " " : "not"); -} - -/*****************************************************************************/ - -GenTree* Compiler::fgInitThisClass() -{ - noway_assert(!compIsForInlining()); - - CORINFO_LOOKUP_KIND kind; - info.compCompHnd->getLocationOfThisType(info.compMethodHnd, &kind); - - if (!kind.needsRuntimeLookup) - { - return fgGetSharedCCtor(info.compClassHnd); - } - else - { -#ifdef FEATURE_READYTORUN_COMPILER - // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR. - if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI)) - { - CORINFO_RESOLVED_TOKEN resolvedToken; - memset(&resolvedToken, 0, sizeof(resolvedToken)); - - // We are in a shared method body, but maybe we don't need a runtime lookup after all. - // This covers the case of a generic method on a non-generic type. - if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST)) - { - resolvedToken.hClass = info.compClassHnd; - return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF); - } - - // We need a runtime lookup. - GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind); - - // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static - // base of the class that owns the method being compiled". If we're in this method, it means we're not - // inlining and there's no ambiguity. - return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF, - gtNewCallArgs(ctxTree), &kind); - } -#endif - - // Collectible types requires that for shared generic code, if we use the generic context paramter - // that we report it. (This is a conservative approach, we could detect some cases particularly when the - // context parameter is this that we don't need the eager reporting logic.) - lvaGenericsContextInUse = true; - - switch (kind.runtimeLookupKind) - { - case CORINFO_LOOKUP_THISOBJ: - { - // This code takes a this pointer; but we need to pass the static method desc to get the right point in - // the hierarchy - GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF); - vtTree->gtFlags |= GTF_VAR_CONTEXT; - // Vtable pointer of this object - vtTree = gtNewMethodTableLookup(vtTree); - GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd); - - return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewCallArgs(vtTree, methodHnd)); - } - - case CORINFO_LOOKUP_CLASSPARAM: - { - GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); - vtTree->gtFlags |= GTF_VAR_CONTEXT; - return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewCallArgs(vtTree)); - } - - case CORINFO_LOOKUP_METHODPARAM: - { - GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); - methHndTree->gtFlags |= GTF_VAR_CONTEXT; - return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, - gtNewCallArgs(gtNewIconNode(0), methHndTree)); - } - - default: - noway_assert(!"Unknown LOOKUP_KIND"); - UNREACHABLE(); - } - } -} - -#ifdef DEBUG -/***************************************************************************** - * - * Tree walk callback to make sure no GT_QMARK nodes are present in the tree, - * except for the allowed ? 1 : 0; pattern. - */ -Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data) -{ - if ((*tree)->OperGet() == GT_QMARK) - { - fgCheckQmarkAllowedForm(*tree); - } - return WALK_CONTINUE; -} - -void Compiler::fgCheckQmarkAllowedForm(GenTree* tree) -{ - assert(tree->OperGet() == GT_QMARK); - assert(!"Qmarks beyond morph disallowed."); -} - -/***************************************************************************** - * - * Verify that the importer has created GT_QMARK nodes in a way we can - * process them. The following is allowed: - * - * 1. A top level qmark. Top level qmark is of the form: - * a) (bool) ? (void) : (void) OR - * b) V0N = (bool) ? (type) : (type) - * - * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child - * of either op1 of colon or op2 of colon but not a child of any other - * operator. - */ -void Compiler::fgPreExpandQmarkChecks(GenTree* expr) -{ - GenTree* topQmark = fgGetTopLevelQmark(expr); - - // If the top level Qmark is null, then scan the tree to make sure - // there are no qmarks within it. - if (topQmark == nullptr) - { - fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); - } - else - { - // We could probably expand the cond node also, but don't think the extra effort is necessary, - // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks. - fgWalkTreePre(&topQmark->AsOp()->gtOp1, Compiler::fgAssertNoQmark, nullptr); - - fgPreExpandQmarkChecks(topQmark->AsOp()->gtOp2->AsOp()->gtOp1); - fgPreExpandQmarkChecks(topQmark->AsOp()->gtOp2->AsOp()->gtOp2); - } -} -#endif // DEBUG - -/***************************************************************************** - * - * Get the top level GT_QMARK node in a given "expr", return NULL if such a - * node is not present. If the top level GT_QMARK node is assigned to a - * GT_LCL_VAR, then return the lcl node in ppDst. - * - */ -GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */) -{ - if (ppDst != nullptr) - { - *ppDst = nullptr; - } - - GenTree* topQmark = nullptr; - if (expr->gtOper == GT_QMARK) - { - topQmark = expr; - } - else if (expr->gtOper == GT_ASG && expr->AsOp()->gtOp2->gtOper == GT_QMARK && - expr->AsOp()->gtOp1->gtOper == GT_LCL_VAR) - { - topQmark = expr->AsOp()->gtOp2; - if (ppDst != nullptr) - { - *ppDst = expr->AsOp()->gtOp1; - } - } - return topQmark; -} - -/********************************************************************************* - * - * For a castclass helper call, - * Importer creates the following tree: - * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper()); - * - * This method splits the qmark expression created by the importer into the - * following blocks: (block, asg, cond1, cond2, helper, remainder) - * Notice that op1 is the result for both the conditions. So we coalesce these - * assignments into a single block instead of two blocks resulting a nested diamond. - * - * +---------->-----------+ - * | | | - * ^ ^ v - * | | | - * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder - * - * We expect to achieve the following codegen: - * mov rsi, rdx tmp = op1 // asgBlock - * test rsi, rsi goto skip if tmp == null ? // cond1Block - * je SKIP - * mov rcx, 0x76543210 cns = op2 // cond2Block - * cmp qword ptr [rsi], rcx goto skip if *tmp == op2 - * je SKIP - * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock - * mov rsi, rax - * SKIP: // remainderBlock - * tmp has the result. - * - */ -void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt) -{ -#ifdef DEBUG - if (verbose) - { - printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n", block->bbNum); - fgDispBasicBlocks(block, block, true); - } -#endif // DEBUG - - GenTree* expr = stmt->GetRootNode(); - - GenTree* dst = nullptr; - GenTree* qmark = fgGetTopLevelQmark(expr, &dst); - noway_assert(dst != nullptr); - - assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF); - - // Get cond, true, false exprs for the qmark. - GenTree* condExpr = qmark->gtGetOp1(); - GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); - GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); - - // Get cond, true, false exprs for the nested qmark. - GenTree* nestedQmark = falseExpr; - GenTree* cond2Expr; - GenTree* true2Expr; - GenTree* false2Expr; - - if (nestedQmark->gtOper == GT_QMARK) - { - cond2Expr = nestedQmark->gtGetOp1(); - true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode(); - false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode(); - - assert(cond2Expr->gtFlags & GTF_RELOP_QMARK); - cond2Expr->gtFlags &= ~GTF_RELOP_QMARK; - } - else - { - // This is a rare case that arises when we are doing minopts and encounter isinst of null - // gtFoldExpr was still is able to optimize away part of the tree (but not all). - // That means it does not match our pattern. - - // Rather than write code to handle this case, just fake up some nodes to make it match the common - // case. Synthesize a comparison that is always true, and for the result-on-true, use the - // entire subtree we expected to be the nested question op. - - cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL)); - true2Expr = nestedQmark; - false2Expr = gtNewIconNode(0, TYP_I_IMPL); - } - assert(false2Expr->OperGet() == trueExpr->OperGet()); - - // Clear flags as they are now going to be part of JTRUE. - assert(condExpr->gtFlags & GTF_RELOP_QMARK); - condExpr->gtFlags &= ~GTF_RELOP_QMARK; - - // Create the chain of blocks. See method header comment. - // The order of blocks after this is the following: - // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock - // - // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', - // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only - // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely - // remainderBlock will still be GC safe. - unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; - BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); - fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. - - BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true); - BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true); - BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true); - BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true); - - remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; - - // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). - // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. - if ((block->bbFlags & BBF_INTERNAL) == 0) - { - helperBlock->bbFlags &= ~BBF_INTERNAL; - cond2Block->bbFlags &= ~BBF_INTERNAL; - cond1Block->bbFlags &= ~BBF_INTERNAL; - asgBlock->bbFlags &= ~BBF_INTERNAL; - helperBlock->bbFlags |= BBF_IMPORTED; - cond2Block->bbFlags |= BBF_IMPORTED; - cond1Block->bbFlags |= BBF_IMPORTED; - asgBlock->bbFlags |= BBF_IMPORTED; - } - - // Chain the flow correctly. - fgAddRefPred(asgBlock, block); - fgAddRefPred(cond1Block, asgBlock); - fgAddRefPred(cond2Block, cond1Block); - fgAddRefPred(helperBlock, cond2Block); - fgAddRefPred(remainderBlock, helperBlock); - fgAddRefPred(remainderBlock, cond1Block); - fgAddRefPred(remainderBlock, cond2Block); - - cond1Block->bbJumpDest = remainderBlock; - cond2Block->bbJumpDest = remainderBlock; - - // Set the weights; some are guesses. - asgBlock->inheritWeight(block); - cond1Block->inheritWeight(block); - cond2Block->inheritWeightPercentage(cond1Block, 50); - helperBlock->inheritWeightPercentage(cond2Block, 50); - - // Append cond1 as JTRUE to cond1Block - GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr); - Statement* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetILOffsetX()); - fgInsertStmtAtEnd(cond1Block, jmpStmt); - - // Append cond2 as JTRUE to cond2Block - jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr); - jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetILOffsetX()); - fgInsertStmtAtEnd(cond2Block, jmpStmt); - - // AsgBlock should get tmp = op1 assignment. - trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr); - Statement* trueStmt = fgNewStmtFromTree(trueExpr, stmt->GetILOffsetX()); - fgInsertStmtAtEnd(asgBlock, trueStmt); - - // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper. - gtReverseCond(cond2Expr); - GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr); - Statement* helperStmt = fgNewStmtFromTree(helperExpr, stmt->GetILOffsetX()); - fgInsertStmtAtEnd(helperBlock, helperStmt); - - // Finally remove the nested qmark stmt. - fgRemoveStmt(block, stmt); - - if (true2Expr->OperIs(GT_CALL) && (true2Expr->AsCall()->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN)) - { - fgConvertBBToThrowBB(helperBlock); - } - -#ifdef DEBUG - if (verbose) - { - printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n", block->bbNum); - fgDispBasicBlocks(block, remainderBlock, true); - } -#endif // DEBUG -} - -/***************************************************************************** - * - * Expand a statement with a top level qmark node. There are three cases, based - * on whether the qmark has both "true" and "false" arms, or just one of them. - * - * S0; - * C ? T : F; - * S1; - * - * Generates ===> - * - * bbj_always - * +---->------+ - * false | | - * S0 -->-- ~C -->-- T F -->-- S1 - * | | - * +--->--------+ - * bbj_cond(true) - * - * ----------------------------------------- - * - * S0; - * C ? T : NOP; - * S1; - * - * Generates ===> - * - * false - * S0 -->-- ~C -->-- T -->-- S1 - * | | - * +-->-------------+ - * bbj_cond(true) - * - * ----------------------------------------- - * - * S0; - * C ? NOP : F; - * S1; - * - * Generates ===> - * - * false - * S0 -->-- C -->-- F -->-- S1 - * | | - * +-->------------+ - * bbj_cond(true) - * - * If the qmark assigns to a variable, then create tmps for "then" - * and "else" results and assign the temp to the variable as a writeback step. - */ -void Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) -{ - GenTree* expr = stmt->GetRootNode(); - - // Retrieve the Qmark node to be expanded. - GenTree* dst = nullptr; - GenTree* qmark = fgGetTopLevelQmark(expr, &dst); - if (qmark == nullptr) - { - return; - } - - if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF) - { - fgExpandQmarkForCastInstOf(block, stmt); - return; - } - -#ifdef DEBUG - if (verbose) - { - printf("\nExpanding top-level qmark in " FMT_BB " (before)\n", block->bbNum); - fgDispBasicBlocks(block, block, true); - } -#endif // DEBUG - - // Retrieve the operands. - GenTree* condExpr = qmark->gtGetOp1(); - GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); - GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); - - assert(condExpr->gtFlags & GTF_RELOP_QMARK); - condExpr->gtFlags &= ~GTF_RELOP_QMARK; - - assert(!varTypeIsFloating(condExpr->TypeGet())); - - bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP); - bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP); - assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark! - - // Create remainder, cond and "else" blocks. After this, the blocks are in this order: - // block ... condBlock ... elseBlock ... remainderBlock - // - // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', - // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only - // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely - // remainderBlock will still be GC safe. - unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; - BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); - fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. - - BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true); - BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true); - - // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). - // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. - if ((block->bbFlags & BBF_INTERNAL) == 0) - { - condBlock->bbFlags &= ~BBF_INTERNAL; - elseBlock->bbFlags &= ~BBF_INTERNAL; - condBlock->bbFlags |= BBF_IMPORTED; - elseBlock->bbFlags |= BBF_IMPORTED; - } - - remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags; - - condBlock->inheritWeight(block); - - fgAddRefPred(condBlock, block); - fgAddRefPred(elseBlock, condBlock); - fgAddRefPred(remainderBlock, elseBlock); - - BasicBlock* thenBlock = nullptr; - if (hasTrueExpr && hasFalseExpr) - { - // bbj_always - // +---->------+ - // false | | - // S0 -->-- ~C -->-- T F -->-- S1 - // | | - // +--->--------+ - // bbj_cond(true) - // - gtReverseCond(condExpr); - condBlock->bbJumpDest = elseBlock; - - thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true); - thenBlock->bbJumpDest = remainderBlock; - if ((block->bbFlags & BBF_INTERNAL) == 0) - { - thenBlock->bbFlags &= ~BBF_INTERNAL; - thenBlock->bbFlags |= BBF_IMPORTED; - } - - elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL); - - fgAddRefPred(thenBlock, condBlock); - fgAddRefPred(remainderBlock, thenBlock); - - thenBlock->inheritWeightPercentage(condBlock, 50); - elseBlock->inheritWeightPercentage(condBlock, 50); - } - else if (hasTrueExpr) - { - // false - // S0 -->-- ~C -->-- T -->-- S1 - // | | - // +-->-------------+ - // bbj_cond(true) - // - gtReverseCond(condExpr); - condBlock->bbJumpDest = remainderBlock; - fgAddRefPred(remainderBlock, condBlock); - // Since we have no false expr, use the one we'd already created. - thenBlock = elseBlock; - elseBlock = nullptr; - - thenBlock->inheritWeightPercentage(condBlock, 50); - } - else if (hasFalseExpr) - { - // false - // S0 -->-- C -->-- F -->-- S1 - // | | - // +-->------------+ - // bbj_cond(true) - // - condBlock->bbJumpDest = remainderBlock; - fgAddRefPred(remainderBlock, condBlock); - - elseBlock->inheritWeightPercentage(condBlock, 50); - } - - GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1()); - Statement* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetILOffsetX()); - fgInsertStmtAtEnd(condBlock, jmpStmt); - - // Remove the original qmark statement. - fgRemoveStmt(block, stmt); - - // Since we have top level qmarks, we either have a dst for it in which case - // we need to create tmps for true and falseExprs, else just don't bother - // assigning. - unsigned lclNum = BAD_VAR_NUM; - if (dst != nullptr) - { - assert(dst->gtOper == GT_LCL_VAR); - lclNum = dst->AsLclVar()->GetLclNum(); - } - else - { - assert(qmark->TypeGet() == TYP_VOID); - } - - if (hasTrueExpr) - { - if (dst != nullptr) - { - trueExpr = gtNewTempAssign(lclNum, trueExpr); - } - Statement* trueStmt = fgNewStmtFromTree(trueExpr, stmt->GetILOffsetX()); - fgInsertStmtAtEnd(thenBlock, trueStmt); - } - - // Assign the falseExpr into the dst or tmp, insert in elseBlock - if (hasFalseExpr) - { - if (dst != nullptr) - { - falseExpr = gtNewTempAssign(lclNum, falseExpr); - } - Statement* falseStmt = fgNewStmtFromTree(falseExpr, stmt->GetILOffsetX()); - fgInsertStmtAtEnd(elseBlock, falseStmt); - } - -#ifdef DEBUG - if (verbose) - { - printf("\nExpanding top-level qmark in " FMT_BB " (after)\n", block->bbNum); - fgDispBasicBlocks(block, remainderBlock, true); - } -#endif // DEBUG -} - -/***************************************************************************** - * - * Expand GT_QMARK nodes from the flow graph into basic blocks. - * - */ - -void Compiler::fgExpandQmarkNodes() -{ - if (compQmarkUsed) - { - for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) - { - for (Statement* stmt : block->Statements()) - { - GenTree* expr = stmt->GetRootNode(); -#ifdef DEBUG - fgPreExpandQmarkChecks(expr); -#endif - fgExpandQmarkStmt(block, stmt); - } - } -#ifdef DEBUG - fgPostExpandQmarkChecks(); -#endif - } - compQmarkRationalized = true; -} - -#ifdef DEBUG -/***************************************************************************** - * - * Make sure we don't have any more GT_QMARK nodes. - * - */ -void Compiler::fgPostExpandQmarkChecks() -{ - for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) - { - for (Statement* stmt : block->Statements()) - { - GenTree* expr = stmt->GetRootNode(); - fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); - } - } -} -#endif - -/***************************************************************************** - * - * Promoting struct locals - */ -void Compiler::fgPromoteStructs() -{ -#ifdef DEBUG - if (verbose) - { - printf("*************** In fgPromoteStructs()\n"); - } -#endif // DEBUG - - if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) - { - JITDUMP(" promotion opt flag not enabled\n"); - return; - } - - if (fgNoStructPromotion) - { - JITDUMP(" promotion disabled by JitNoStructPromotion\n"); - return; - } - -#if 0 - // The code in this #if has been useful in debugging struct promotion issues, by - // enabling selective enablement of the struct promotion optimization according to - // method hash. -#ifdef DEBUG - unsigned methHash = info.compMethodHash(); - char* lostr = getenv("structpromohashlo"); - unsigned methHashLo = 0; - if (lostr != NULL) - { - sscanf_s(lostr, "%x", &methHashLo); - } - char* histr = getenv("structpromohashhi"); - unsigned methHashHi = UINT32_MAX; - if (histr != NULL) - { - sscanf_s(histr, "%x", &methHashHi); - } - if (methHash < methHashLo || methHash > methHashHi) - { - return; - } - else - { - printf("Promoting structs for method %s, hash = 0x%x.\n", - info.compFullName, info.compMethodHash()); - printf(""); // in our logic this causes a flush - } -#endif // DEBUG -#endif // 0 - - if (info.compIsVarArgs) - { - JITDUMP(" promotion disabled because of varargs\n"); - return; - } - -#ifdef DEBUG - if (verbose) - { - printf("\nlvaTable before fgPromoteStructs\n"); - lvaTableDump(); - } -#endif // DEBUG - - // The lvaTable might grow as we grab temps. Make a local copy here. - unsigned startLvaCount = lvaCount; - - // - // Loop through the original lvaTable. Looking for struct locals to be promoted. - // - lvaStructPromotionInfo structPromotionInfo; - bool tooManyLocalsReported = false; - - // Clear the structPromotionHelper, since it is used during inlining, at which point it - // may be conservative about looking up SIMD info. - // We don't want to preserve those conservative decisions for the actual struct promotion. - structPromotionHelper->Clear(); - - for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++) - { - // Whether this var got promoted - bool promotedVar = false; - LclVarDsc* varDsc = &lvaTable[lclNum]; - - // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote - // its fields. Instead, we will attempt to enregister the entire struct. - if (varDsc->lvIsSIMDType() && (varDsc->lvIsUsedInSIMDIntrinsic() || isOpaqueSIMDLclVar(varDsc))) - { - varDsc->lvRegStruct = true; - } - // Don't promote if we have reached the tracking limit. - else if (lvaHaveManyLocals()) - { - // Print the message first time when we detected this condition - if (!tooManyLocalsReported) - { - JITDUMP("Stopped promoting struct fields, due to too many locals.\n"); - } - tooManyLocalsReported = true; - } - else if (varTypeIsStruct(varDsc)) - { - assert(structPromotionHelper != nullptr); - promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum); - } - - if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed) - { - // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, - // we will treat it as a reg struct. - varDsc->lvRegStruct = true; - } - } - -#ifdef TARGET_ARM - if (structPromotionHelper->GetRequiresScratchVar()) - { - // Ensure that the scratch variable is allocated, in case we - // pass a promoted struct as an argument. - if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) - { - lvaPromotedStructAssemblyScratchVar = - lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var.")); - lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; - } - } -#endif // TARGET_ARM - -#ifdef DEBUG - if (verbose) - { - printf("\nlvaTable after fgPromoteStructs\n"); - lvaTableDump(); - } -#endif // DEBUG -} - -void Compiler::fgMorphStructField(GenTree* tree, GenTree* parent) -{ - noway_assert(tree->OperGet() == GT_FIELD); - - GenTreeField* field = tree->AsField(); - GenTree* objRef = field->gtFldObj; - GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->AsOp()->gtOp1 : nullptr; - noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))); - - /* Is this an instance data member? */ - - if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)) - { - unsigned lclNum = obj->AsLclVarCommon()->GetLclNum(); - const LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (varTypeIsStruct(obj)) - { - if (varDsc->lvPromoted) - { - // Promoted struct - unsigned fldOffset = field->gtFldOffset; - unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); - - if (fieldLclIndex == BAD_VAR_NUM) - { - // Access a promoted struct's field with an offset that doesn't correspond to any field. - // It can happen if the struct was cast to another struct with different offsets. - return; - } - - const LclVarDsc* fieldDsc = &lvaTable[fieldLclIndex]; - var_types fieldType = fieldDsc->TypeGet(); - - assert(fieldType != TYP_STRUCT); // promoted LCL_VAR can't have a struct type. - if (tree->TypeGet() != fieldType) - { - if (tree->TypeGet() != TYP_STRUCT) - { - // This is going to be an incorrect instruction promotion. - // For example when we try to read int as long. - return; - } - - if (field->gtFldHnd != fieldDsc->lvFieldHnd) - { - CORINFO_CLASS_HANDLE fieldTreeClass = nullptr, fieldDscClass = nullptr; - - CorInfoType fieldTreeType = info.compCompHnd->getFieldType(field->gtFldHnd, &fieldTreeClass); - CorInfoType fieldDscType = info.compCompHnd->getFieldType(fieldDsc->lvFieldHnd, &fieldDscClass); - if (fieldTreeType != fieldDscType || fieldTreeClass != fieldDscClass) - { - // Access the promoted field with a different class handle, can't check that types match. - return; - } - // Access the promoted field as a field of a non-promoted struct with the same class handle. - } - else - { - // As we already checked this above, we must have a tree with a TYP_STRUCT type - // - assert(tree->TypeGet() == TYP_STRUCT); - - // The field tree accesses it as a struct, but the promoted LCL_VAR field - // says that it has another type. This happens when struct promotion unwraps - // a single field struct to get to its ultimate type. - // - // Note that currently, we cannot have a promoted LCL_VAR field with a struct type. - // - // This mismatch in types can lead to problems for some parent node type like GT_RETURN. - // So we check the parent node and only allow this optimization when we have - // a GT_ADDR or a GT_ASG. - // - // Note that for a GT_ASG we have to do some additional work, - // see below after the SetOper(GT_LCL_VAR) - // - if (!parent->OperIs(GT_ADDR, GT_ASG)) - { - // Don't transform other operations such as GT_RETURN - // - return; - } -#ifdef DEBUG - // This is an additional DEBUG-only sanity check - // - assert(structPromotionHelper != nullptr); - structPromotionHelper->CheckRetypedAsScalar(field->gtFldHnd, fieldType); -#endif // DEBUG - } - } - - tree->SetOper(GT_LCL_VAR); - tree->AsLclVarCommon()->SetLclNum(fieldLclIndex); - tree->gtType = fieldType; - tree->gtFlags &= GTF_NODE_MASK; // Note: that clears all flags except `GTF_COLON_COND`. - - if (parent->gtOper == GT_ASG) - { - // If we are changing the left side of an assignment, we need to set - // these two flags: - // - if (parent->AsOp()->gtOp1 == tree) - { - tree->gtFlags |= GTF_VAR_DEF; - tree->gtFlags |= GTF_DONT_CSE; - } - - // Promotion of struct containing struct fields where the field - // is a struct with a single pointer sized scalar type field: in - // this case struct promotion uses the type of the underlying - // scalar field as the type of struct field instead of recursively - // promoting. This can lead to a case where we have a block-asgn - // with its RHS replaced with a scalar type. Mark RHS value as - // DONT_CSE so that assertion prop will not do const propagation. - // The reason this is required is that if RHS of a block-asg is a - // constant, then it is interpreted as init-block incorrectly. - // - // TODO - This can also be avoided if we implement recursive struct - // promotion, tracked by #10019. - if (varTypeIsStruct(parent) && parent->AsOp()->gtOp2 == tree && !varTypeIsStruct(tree)) - { - tree->gtFlags |= GTF_DONT_CSE; - } - } -#ifdef DEBUG - if (verbose) - { - printf("Replacing the field in promoted struct with local var V%02u\n", fieldLclIndex); - } -#endif // DEBUG - } - } - else - { - // Normed struct - // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if - // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 - // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However, - // there is one extremely rare case where that won't be true. An enum type is a special value type - // that contains exactly one element of a primitive integer type (that, for CLS programs is named - // "value__"). The VM tells us that a local var of that enum type is the primitive type of the - // enum's single field. It turns out that it is legal for IL to access this field using ldflda or - // ldfld. For example: - // - // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum - // { - // .field public specialname rtspecialname int16 value__ - // .field public static literal valuetype mynamespace.e_t one = int16(0x0000) - // } - // .method public hidebysig static void Main() cil managed - // { - // .locals init (valuetype mynamespace.e_t V_0) - // ... - // ldloca.s V_0 - // ldflda int16 mynamespace.e_t::value__ - // ... - // } - // - // Normally, compilers will not generate the ldflda, since it is superfluous. - // - // In the example, the lclVar is short, but the JIT promotes all trees using this local to the - // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type - // mismatch like this, don't do this morphing. The local var may end up getting marked as - // address taken, and the appropriate SHORT load will be done from memory in that case. - - if (tree->TypeGet() == obj->TypeGet()) - { - tree->ChangeOper(GT_LCL_VAR); - tree->AsLclVarCommon()->SetLclNum(lclNum); - tree->gtFlags &= GTF_NODE_MASK; - - if ((parent->gtOper == GT_ASG) && (parent->AsOp()->gtOp1 == tree)) - { - tree->gtFlags |= GTF_VAR_DEF; - tree->gtFlags |= GTF_DONT_CSE; - } -#ifdef DEBUG - if (verbose) - { - printf("Replacing the field in normed struct with local var V%02u\n", lclNum); - } -#endif // DEBUG - } - } - } -} - -void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) -{ - noway_assert(tree->OperGet() == GT_LCL_FLD); - - unsigned lclNum = tree->AsLclFld()->GetLclNum(); - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (varTypeIsStruct(varDsc)) - { - if (varDsc->lvPromoted) - { - // Promoted struct - unsigned fldOffset = tree->AsLclFld()->GetLclOffs(); - unsigned fieldLclIndex = 0; - LclVarDsc* fldVarDsc = nullptr; - - if (fldOffset != BAD_VAR_NUM) - { - fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); - noway_assert(fieldLclIndex != BAD_VAR_NUM); - fldVarDsc = &lvaTable[fieldLclIndex]; - } - - var_types treeType = tree->TypeGet(); - var_types fieldType = fldVarDsc->TypeGet(); - if (fldOffset != BAD_VAR_NUM && - ((genTypeSize(fieldType) == genTypeSize(treeType)) || (varDsc->lvFieldCnt == 1))) - { - // There is an existing sub-field we can use. - tree->AsLclFld()->SetLclNum(fieldLclIndex); - - // The field must be an enregisterable type; otherwise it would not be a promoted field. - // The tree type may not match, e.g. for return types that have been morphed, but both - // must be enregisterable types. - assert(varTypeIsEnregisterable(treeType) && varTypeIsEnregisterable(fieldType)); - - tree->ChangeOper(GT_LCL_VAR); - assert(tree->AsLclVarCommon()->GetLclNum() == fieldLclIndex); - tree->gtType = fldVarDsc->TypeGet(); - - if ((parent->gtOper == GT_ASG) && (parent->AsOp()->gtOp1 == tree)) - { - tree->gtFlags |= GTF_VAR_DEF; - tree->gtFlags |= GTF_DONT_CSE; - } - JITDUMP("Replacing the GT_LCL_FLD in promoted struct with local var V%02u\n", fieldLclIndex); - } - else - { - // There is no existing field that has all the parts that we need - // So we must ensure that the struct lives in memory. - lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); - -#ifdef DEBUG - // We can't convert this guy to a float because he really does have his - // address taken.. - varDsc->lvKeepType = 1; -#endif // DEBUG - } - } - else if (varTypeIsSIMD(varDsc) && (genTypeSize(tree->TypeGet()) == genTypeSize(varDsc))) - { - assert(tree->AsLclFld()->GetLclOffs() == 0); - tree->gtType = varDsc->TypeGet(); - tree->ChangeOper(GT_LCL_VAR); - JITDUMP("Replacing GT_LCL_FLD of struct with local var V%02u\n", lclNum); - } - } -} - -//------------------------------------------------------------------------ -// fgResetImplicitByRefRefCount: Clear the ref count field of all implicit byrefs - -void Compiler::fgResetImplicitByRefRefCount() -{ -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) -#ifdef DEBUG - if (verbose) - { - printf("\n*************** In fgResetImplicitByRefRefCount()\n"); - } -#endif // DEBUG - - for (unsigned lclNum = 0; lclNum < info.compArgsCount; ++lclNum) - { - LclVarDsc* varDsc = lvaGetDesc(lclNum); - - if (varDsc->lvIsImplicitByRef) - { - // Clear the ref count field; fgMarkAddressTakenLocals will increment it per - // appearance of implicit-by-ref param so that call arg morphing can do an - // optimization for single-use implicit-by-ref params whose single use is as - // an outgoing call argument. - varDsc->setLvRefCnt(0, RCS_EARLY); - } - } - -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 -} - -//------------------------------------------------------------------------ -// fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from -// struct to pointer). Also choose (based on address-exposed analysis) -// which struct promotions of implicit byrefs to keep or discard. -// For those which are kept, insert the appropriate initialization code. -// For those which are to be discarded, annotate the promoted field locals -// so that fgMorphImplicitByRefArgs will know to rewrite their appearances -// using indirections off the pointer parameters. - -void Compiler::fgRetypeImplicitByRefArgs() -{ -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) -#ifdef DEBUG - if (verbose) - { - printf("\n*************** In fgRetypeImplicitByRefArgs()\n"); - } -#endif // DEBUG - - for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) - { - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (lvaIsImplicitByRefLocal(lclNum)) - { - unsigned size; - - if (varDsc->lvSize() > REGSIZE_BYTES) - { - size = varDsc->lvSize(); - } - else - { - CORINFO_CLASS_HANDLE typeHnd = varDsc->GetStructHnd(); - size = info.compCompHnd->getClassSize(typeHnd); - } - - if (varDsc->lvPromoted) - { - // This implicit-by-ref was promoted; create a new temp to represent the - // promoted struct before rewriting this parameter as a pointer. - unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref")); - lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true); - if (info.compIsVarArgs) - { - lvaSetStructUsedAsVarArg(newLclNum); - } - - // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array. - varDsc = &lvaTable[lclNum]; - - // Copy the struct promotion annotations to the new temp. - LclVarDsc* newVarDsc = &lvaTable[newLclNum]; - newVarDsc->lvPromoted = true; - newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart; - newVarDsc->lvFieldCnt = varDsc->lvFieldCnt; - newVarDsc->lvContainsHoles = varDsc->lvContainsHoles; - newVarDsc->lvCustomLayout = varDsc->lvCustomLayout; -#ifdef DEBUG - newVarDsc->lvKeepType = true; -#endif // DEBUG - - // Propagate address-taken-ness and do-not-enregister-ness. - newVarDsc->lvAddrExposed = varDsc->lvAddrExposed; - newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister; -#ifdef DEBUG - newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr; - newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr; - newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr; - newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr; - newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall; -#endif // DEBUG - - // If the promotion is dependent, the promoted temp would just be committed - // to memory anyway, so we'll rewrite its appearances to be indirections - // through the pointer parameter, the same as we'd do for this - // parameter if it weren't promoted at all (otherwise the initialization - // of the new temp would just be a needless memcpy at method entry). - // - // Otherwise, see how many appearances there are. We keep two early ref counts: total - // number of references to the struct or some field, and how many of these are - // arguments to calls. We undo promotion unless we see enough non-call uses. - // - const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY); - const unsigned callAppearances = (unsigned)varDsc->lvRefCntWtd(RCS_EARLY); - assert(totalAppearances >= callAppearances); - const unsigned nonCallAppearances = totalAppearances - callAppearances; - - bool undoPromotion = ((lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || - (nonCallAppearances <= varDsc->lvFieldCnt)); - -#ifdef DEBUG - // Above is a profitability heurisic; either value of - // undoPromotion should lead to correct code. So, - // under stress, make different decisions at times. - if (compStressCompile(STRESS_BYREF_PROMOTION, 25)) - { - undoPromotion = !undoPromotion; - JITDUMP("Stress -- changing byref undo promotion for V%02u to %s undo\n", lclNum, - undoPromotion ? "" : "NOT"); - } -#endif // DEBUG - - JITDUMP("%s promotion of implicit by-ref V%02u: %s total: %u non-call: %u fields: %u\n", - undoPromotion ? "Undoing" : "Keeping", lclNum, - (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ? "dependent;" : "", - totalAppearances, nonCallAppearances, varDsc->lvFieldCnt); - - if (!undoPromotion) - { - // Insert IR that initializes the temp from the parameter. - // LHS is a simple reference to the temp. - fgEnsureFirstBBisScratch(); - GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType); - // RHS is an indirection (using GT_OBJ) off the parameter. - GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF); - GenTree* rhs = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, typGetBlkLayout(size)); - GenTree* assign = gtNewAssignNode(lhs, rhs); - fgNewStmtAtBeg(fgFirstBB, assign); - } - - // Update the locals corresponding to the promoted fields. - unsigned fieldLclStart = varDsc->lvFieldLclStart; - unsigned fieldCount = varDsc->lvFieldCnt; - unsigned fieldLclStop = fieldLclStart + fieldCount; - - for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) - { - LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; - - if (undoPromotion) - { - // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs - // will know to rewrite appearances of this local. - assert(fieldVarDsc->lvParentLcl == lclNum); - } - else - { - // Set the new parent. - fieldVarDsc->lvParentLcl = newLclNum; - // Clear the ref count field; it is used to communicate the number of references - // to the implicit byref parameter when morphing calls that pass the implicit byref - // out as an outgoing argument value, but that doesn't pertain to this field local - // which is now a field of a non-arg local. - fieldVarDsc->setLvRefCnt(0, RCS_EARLY); - } - - fieldVarDsc->lvIsParam = false; - // The fields shouldn't inherit any register preferences from - // the parameter which is really a pointer to the struct. - fieldVarDsc->lvIsRegArg = false; - fieldVarDsc->lvIsMultiRegArg = false; - fieldVarDsc->SetArgReg(REG_NA); -#if FEATURE_MULTIREG_ARGS - fieldVarDsc->SetOtherArgReg(REG_NA); -#endif - } - - // Hijack lvFieldLclStart to record the new temp number. - // It will get fixed up in fgMarkDemotedImplicitByRefArgs. - varDsc->lvFieldLclStart = newLclNum; - // Go ahead and clear lvFieldCnt -- either we're promoting - // a replacement temp or we're not promoting this arg, and - // in either case the parameter is now a pointer that doesn't - // have these fields. - varDsc->lvFieldCnt = 0; - - // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs - // whether references to the struct should be rewritten as - // indirections off the pointer (not promoted) or references - // to the new struct local (promoted). - varDsc->lvPromoted = !undoPromotion; - } - else - { - // The "undo promotion" path above clears lvPromoted for args that struct - // promotion wanted to promote but that aren't considered profitable to - // rewrite. It hijacks lvFieldLclStart to communicate to - // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left - // on such args for fgMorphImplicitByRefArgs to consult in the interim. - // Here we have an arg that was simply never promoted, so make sure it doesn't - // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs - // and fgMarkDemotedImplicitByRefArgs. - assert(varDsc->lvFieldLclStart == 0); - } - - // Since the parameter in this position is really a pointer, its type is TYP_BYREF. - varDsc->lvType = TYP_BYREF; - - // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF - // make sure that the following flag is not set as these will force SSA to - // exclude tracking/enregistering these LclVars. (see SsaBuilder::IncludeInSsa) - // - varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it. - - // The struct parameter may have had its address taken, but the pointer parameter - // cannot -- any uses of the struct parameter's address are uses of the pointer - // parameter's value, and there's no way for the MSIL to reference the pointer - // parameter's address. So clear the address-taken bit for the parameter. - varDsc->lvAddrExposed = 0; - varDsc->lvDoNotEnregister = 0; - -#ifdef DEBUG - // This should not be converted to a double in stress mode, - // because it is really a pointer - varDsc->lvKeepType = 1; - - if (verbose) - { - printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum); - } -#endif // DEBUG - } - } - -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 -} - -//------------------------------------------------------------------------ -// fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion -// asked to promote. Appearances of these have now been rewritten -// (by fgMorphImplicitByRefArgs) using indirections from the pointer -// parameter or references to the promotion temp, as appropriate. - -void Compiler::fgMarkDemotedImplicitByRefArgs() -{ -#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) - - for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) - { - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (lvaIsImplicitByRefLocal(lclNum)) - { - if (varDsc->lvPromoted) - { - // The parameter is simply a pointer now, so clear lvPromoted. It was left set - // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that - // appearances of this arg needed to be rewritten to a new promoted struct local. - varDsc->lvPromoted = false; - - // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs - // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one. - varDsc->lvFieldLclStart = 0; - } - else if (varDsc->lvFieldLclStart != 0) - { - // We created new temps to represent a promoted struct corresponding to this - // parameter, but decided not to go through with the promotion and have - // rewritten all uses as indirections off the pointer parameter. - // We stashed the pointer to the new struct temp in lvFieldLclStart; make - // note of that and clear the annotation. - unsigned structLclNum = varDsc->lvFieldLclStart; - varDsc->lvFieldLclStart = 0; - - // Clear the arg's ref count; this was set during address-taken analysis so that - // call morphing could identify single-use implicit byrefs; we're done with - // that, and want it to be in its default state of zero when we go to set - // real ref counts for all variables. - varDsc->setLvRefCnt(0, RCS_EARLY); - - // The temp struct is now unused; set flags appropriately so that we - // won't allocate space for it on the stack. - LclVarDsc* structVarDsc = &lvaTable[structLclNum]; - structVarDsc->setLvRefCnt(0, RCS_EARLY); - structVarDsc->lvAddrExposed = false; -#ifdef DEBUG - structVarDsc->lvUnusedStruct = true; -#endif // DEBUG - - unsigned fieldLclStart = structVarDsc->lvFieldLclStart; - unsigned fieldCount = structVarDsc->lvFieldCnt; - unsigned fieldLclStop = fieldLclStart + fieldCount; - - for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) - { - // Fix the pointer to the parent local. - LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; - assert(fieldVarDsc->lvParentLcl == lclNum); - fieldVarDsc->lvParentLcl = structLclNum; - - // The field local is now unused; set flags appropriately so that - // we won't allocate stack space for it. - fieldVarDsc->setLvRefCnt(0, RCS_EARLY); - fieldVarDsc->lvAddrExposed = false; - } - } - } - } - -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 -} - -/***************************************************************************** - * - * Morph irregular parameters - * for x64 and ARM64 this means turning them into byrefs, adding extra indirs. - */ -bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) -{ -#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) - - return false; - -#else // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 - - bool changed = false; - - // Implicit byref morphing needs to know if the reference to the parameter is a - // child of GT_ADDR or not, so this method looks one level down and does the - // rewrite whenever a child is a reference to an implicit byref parameter. - if (tree->gtOper == GT_ADDR) - { - if (tree->AsOp()->gtOp1->gtOper == GT_LCL_VAR) - { - GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true); - changed = (morphedTree != nullptr); - assert(!changed || (morphedTree == tree)); - } - } - else - { - for (GenTree** pTree : tree->UseEdges()) - { - GenTree** pTreeCopy = pTree; - GenTree* childTree = *pTree; - if (childTree->gtOper == GT_LCL_VAR) - { - GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false); - if (newChildTree != nullptr) - { - changed = true; - *pTreeCopy = newChildTree; - } - } - } - } - - return changed; -#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 -} - -GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) -{ - assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->AsOp()->gtOp1->gtOper == GT_LCL_VAR))); - assert(isAddr == (tree->gtOper == GT_ADDR)); - - GenTree* lclVarTree = isAddr ? tree->AsOp()->gtOp1 : tree; - unsigned lclNum = lclVarTree->AsLclVarCommon()->GetLclNum(); - LclVarDsc* lclVarDsc = &lvaTable[lclNum]; - - CORINFO_FIELD_HANDLE fieldHnd; - unsigned fieldOffset = 0; - var_types fieldRefType = TYP_UNKNOWN; - - if (lvaIsImplicitByRefLocal(lclNum)) - { - // The SIMD transformation to coalesce contiguous references to SIMD vector fields will - // re-invoke the traversal to mark address-taken locals. - // So, we may encounter a tree that has already been transformed to TYP_BYREF. - // If we do, leave it as-is. - if (!varTypeIsStruct(lclVarTree)) - { - assert(lclVarTree->TypeGet() == TYP_BYREF); - - return nullptr; - } - else if (lclVarDsc->lvPromoted) - { - // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this - // arg. Rewrite this to refer to the new local. - assert(lclVarDsc->lvFieldLclStart != 0); - lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart); - return tree; - } - - fieldHnd = nullptr; - } - else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl)) - { - // This was a field reference to an implicit-by-reference struct parameter that was - // dependently promoted; update it to a field reference off the pointer. - // Grab the field handle from the struct field lclVar. - fieldHnd = lclVarDsc->lvFieldHnd; - fieldOffset = lclVarDsc->lvFldOffset; - assert(fieldHnd != nullptr); - // Update lclNum/lclVarDsc to refer to the parameter - lclNum = lclVarDsc->lvParentLcl; - lclVarDsc = &lvaTable[lclNum]; - fieldRefType = lclVarTree->TypeGet(); - } - else - { - // We only need to tranform the 'marked' implicit by ref parameters - return nullptr; - } - - // This is no longer a def of the lclVar, even if it WAS a def of the struct. - lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK); - - if (isAddr) - { - if (fieldHnd == nullptr) - { - // change &X into just plain X - tree->ReplaceWith(lclVarTree, this); - tree->gtType = TYP_BYREF; - } - else - { - // change &(X.f) [i.e. GT_ADDR of local for promoted arg field] - // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param] - lclVarTree->AsLclVarCommon()->SetLclNum(lclNum); - lclVarTree->gtType = TYP_BYREF; - tree->AsOp()->gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset); - } - -#ifdef DEBUG - if (verbose) - { - printf("Replacing address of implicit by ref struct parameter with byref:\n"); - } -#endif // DEBUG - } - else - { - // Change X into OBJ(X) or FIELD(X, f) - var_types structType = tree->gtType; - tree->gtType = TYP_BYREF; - - if (fieldHnd) - { - tree->AsLclVarCommon()->SetLclNum(lclNum); - tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset); - } - else - { - tree = gtNewObjNode(lclVarDsc->GetStructHnd(), tree); - - if (structType == TYP_STRUCT) - { - gtSetObjGcInfo(tree->AsObj()); - } - } - - // TODO-CQ: If the VM ever stops violating the ABI and passing heap references - // we could remove TGTANYWHERE - tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE); - -#ifdef DEBUG - if (verbose) - { - printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n"); - } -#endif // DEBUG - } - -#ifdef DEBUG - if (verbose) - { - gtDispTree(tree); - } -#endif // DEBUG - - return tree; -} - -//------------------------------------------------------------------------ -// fgAddFieldSeqForZeroOffset: -// Associate a fieldSeq (with a zero offset) with the GenTree node 'addr' -// -// Arguments: -// addr - A GenTree node -// fieldSeqZero - a fieldSeq (with a zero offset) -// -// Notes: -// Some GenTree nodes have internal fields that record the field sequence. -// If we have one of these nodes: GT_CNS_INT, GT_LCL_FLD -// we can append the field sequence using the gtFieldSeq -// If we have a GT_ADD of a GT_CNS_INT we can use the -// fieldSeq from child node. -// Otherwise we record 'fieldSeqZero' in the GenTree node using -// a Map: GetFieldSeqStore() -// When doing so we take care to preserve any existing zero field sequence -// -void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero) -{ - // We expect 'addr' to be an address at this point. - assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); - - // Tunnel through any commas. - const bool commaOnly = true; - addr = addr->gtEffectiveVal(commaOnly); - - // We still expect 'addr' to be an address at this point. - assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); - - FieldSeqNode* fieldSeqUpdate = fieldSeqZero; - GenTree* fieldSeqNode = addr; - bool fieldSeqRecorded = false; - -#ifdef DEBUG - if (verbose) - { - printf("\nfgAddFieldSeqForZeroOffset for"); - gtDispFieldSeq(fieldSeqZero); - - printf("\naddr (Before)\n"); - gtDispNode(addr, nullptr, nullptr, false); - gtDispCommonEndLine(addr); - } -#endif // DEBUG - - switch (addr->OperGet()) - { - case GT_CNS_INT: - fieldSeqUpdate = GetFieldSeqStore()->Append(addr->AsIntCon()->gtFieldSeq, fieldSeqZero); - addr->AsIntCon()->gtFieldSeq = fieldSeqUpdate; - fieldSeqRecorded = true; - break; - - case GT_LCL_FLD: - { - GenTreeLclFld* lclFld = addr->AsLclFld(); - fieldSeqUpdate = GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeqZero); - lclFld->SetFieldSeq(fieldSeqUpdate); - fieldSeqRecorded = true; - break; - } - - case GT_ADDR: - if (addr->AsOp()->gtOp1->OperGet() == GT_LCL_FLD) - { - fieldSeqNode = addr->AsOp()->gtOp1; - - GenTreeLclFld* lclFld = addr->AsOp()->gtOp1->AsLclFld(); - fieldSeqUpdate = GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeqZero); - lclFld->SetFieldSeq(fieldSeqUpdate); - fieldSeqRecorded = true; - } - break; - - case GT_ADD: - if (addr->AsOp()->gtOp1->OperGet() == GT_CNS_INT) - { - fieldSeqNode = addr->AsOp()->gtOp1; - - fieldSeqUpdate = GetFieldSeqStore()->Append(addr->AsOp()->gtOp1->AsIntCon()->gtFieldSeq, fieldSeqZero); - addr->AsOp()->gtOp1->AsIntCon()->gtFieldSeq = fieldSeqUpdate; - fieldSeqRecorded = true; - } - else if (addr->AsOp()->gtOp2->OperGet() == GT_CNS_INT) - { - fieldSeqNode = addr->AsOp()->gtOp2; - - fieldSeqUpdate = GetFieldSeqStore()->Append(addr->AsOp()->gtOp2->AsIntCon()->gtFieldSeq, fieldSeqZero); - addr->AsOp()->gtOp2->AsIntCon()->gtFieldSeq = fieldSeqUpdate; - fieldSeqRecorded = true; - } - break; - - default: - break; - } - - if (fieldSeqRecorded == false) - { - // Record in the general zero-offset map. - - // The "addr" node might already be annotated with a zero-offset field sequence. - FieldSeqNode* existingFieldSeq = nullptr; - if (GetZeroOffsetFieldMap()->Lookup(addr, &existingFieldSeq)) - { - // Append the zero field sequences - fieldSeqUpdate = GetFieldSeqStore()->Append(existingFieldSeq, fieldSeqZero); - } - // Overwrite the field sequence annotation for op1 - GetZeroOffsetFieldMap()->Set(addr, fieldSeqUpdate, NodeToFieldSeqMap::Overwrite); - fieldSeqRecorded = true; - } - -#ifdef DEBUG - if (verbose) - { - printf(" (After)\n"); - gtDispNode(fieldSeqNode, nullptr, nullptr, false); - gtDispCommonEndLine(fieldSeqNode); - } -#endif // DEBUG -} - -#ifdef FEATURE_SIMD - -//----------------------------------------------------------------------------------- -// fgMorphCombineSIMDFieldAssignments: -// If the RHS of the input stmt is a read for simd vector X Field, then this function -// will keep reading next few stmts based on the vector size(2, 3, 4). -// If the next stmts LHS are located contiguous and RHS are also located -// contiguous, then we replace those statements with a copyblk. -// -// Argument: -// block - BasicBlock*. block which stmt belongs to -// stmt - Statement*. the stmt node we want to check -// -// return value: -// if this funciton successfully optimized the stmts, then return true. Otherwise -// return false; - -bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt) -{ - GenTree* tree = stmt->GetRootNode(); - assert(tree->OperGet() == GT_ASG); - - GenTree* originalLHS = tree->AsOp()->gtOp1; - GenTree* prevLHS = tree->AsOp()->gtOp1; - GenTree* prevRHS = tree->AsOp()->gtOp2; - unsigned index = 0; - var_types baseType = TYP_UNKNOWN; - unsigned simdSize = 0; - GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true); - - if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT) - { - // if the RHS is not from a SIMD vector field X, then there is no need to check further. - return false; - } - - var_types simdType = getSIMDTypeForSize(simdSize); - int assignmentsCount = simdSize / genTypeSize(baseType) - 1; - int remainingAssignments = assignmentsCount; - Statement* curStmt = stmt->GetNextStmt(); - Statement* lastStmt = stmt; - - while (curStmt != nullptr && remainingAssignments > 0) - { - GenTree* exp = curStmt->GetRootNode(); - if (exp->OperGet() != GT_ASG) - { - break; - } - GenTree* curLHS = exp->gtGetOp1(); - GenTree* curRHS = exp->gtGetOp2(); - - if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS)) - { - break; - } - - remainingAssignments--; - prevLHS = curLHS; - prevRHS = curRHS; - - lastStmt = curStmt; - curStmt = curStmt->GetNextStmt(); - } - - if (remainingAssignments > 0) - { - // if the left assignments number is bigger than zero, then this means - // that the assignments are not assgining to the contiguously memory - // locations from same vector. - return false; - } -#ifdef DEBUG - if (verbose) - { - printf("\nFound contiguous assignments from a SIMD vector to memory.\n"); - printf("From " FMT_BB ", stmt ", block->bbNum); - printStmtID(stmt); - printf(" to stmt"); - printStmtID(lastStmt); - printf("\n"); - } -#endif - - for (int i = 0; i < assignmentsCount; i++) - { - fgRemoveStmt(block, stmt->GetNextStmt()); - } - - GenTree* dstNode; - - if (originalLHS->OperIs(GT_LCL_FLD)) - { - dstNode = originalLHS; - dstNode->gtType = simdType; - dstNode->AsLclFld()->SetFieldSeq(FieldSeqStore::NotAField()); - - // This may have changed a partial local field into full local field - if (dstNode->IsPartialLclFld(this)) - { - dstNode->gtFlags |= GTF_VAR_USEASG; - } - else - { - dstNode->gtFlags &= ~GTF_VAR_USEASG; - } - } - else - { - GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); - if (simdStructNode->OperIsLocal()) - { - setLclRelatedToSIMDIntrinsic(simdStructNode); - } - GenTree* copyBlkAddr = copyBlkDst; - if (copyBlkAddr->gtOper == GT_LEA) - { - copyBlkAddr = copyBlkAddr->AsAddrMode()->Base(); - } - GenTreeLclVarCommon* localDst = nullptr; - if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr)) - { - setLclRelatedToSIMDIntrinsic(localDst); - } - - if (simdStructNode->TypeGet() == TYP_BYREF) - { - assert(simdStructNode->OperIsLocal()); - assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->GetLclNum())); - simdStructNode = gtNewIndir(simdType, simdStructNode); - } - else - { - assert(varTypeIsSIMD(simdStructNode)); - } - - dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst); - } - -#ifdef DEBUG - if (verbose) - { - printf("\n" FMT_BB " stmt ", block->bbNum); - printStmtID(stmt); - printf("(before)\n"); - gtDispStmt(stmt); - } -#endif - - assert(!simdStructNode->CanCSE()); - simdStructNode->ClearDoNotCSE(); - - tree = gtNewAssignNode(dstNode, simdStructNode); - - stmt->SetRootNode(tree); - - // Since we generated a new address node which didn't exist before, - // we should expose this address manually here. - // TODO-ADDR: Remove this when LocalAddressVisitor transforms all - // local field access into LCL_FLDs, at that point we would be - // combining 2 existing LCL_FLDs or 2 FIELDs that do not reference - // a local and thus cannot result in a new address exposed local. - fgMarkAddressExposedLocals(stmt); - -#ifdef DEBUG - if (verbose) - { - printf("\nReplaced " FMT_BB " stmt", block->bbNum); - printStmtID(stmt); - printf("(after)\n"); - gtDispStmt(stmt); - } -#endif - return true; -} - -#endif // FEATURE_SIMD - -//------------------------------------------------------------------------ -// fgCheckStmtAfterTailCall: check that statements after the tail call stmt -// candidate are in one of expected forms, that are desctibed below. -// -// Return Value: -// 'true' if stmts are in the expected form, else 'false'. -// -bool Compiler::fgCheckStmtAfterTailCall() -{ - - // For void calls, we would have created a GT_CALL in the stmt list. - // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)). - // For calls returning structs, we would have a void call, followed by a void return. - // For debuggable code, it would be an assignment of the call to a temp - // We want to get rid of any of this extra trees, and just leave - // the call. - Statement* callStmt = fgMorphStmt; - - Statement* nextMorphStmt = callStmt->GetNextStmt(); - - // Check that the rest stmts in the block are in one of the following pattern: - // 1) ret(void) - // 2) ret(cast*(callResultLclVar)) - // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block - if (nextMorphStmt != nullptr) - { - GenTree* callExpr = callStmt->GetRootNode(); - if (callExpr->gtOper != GT_ASG) - { - // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar), - // where lclVar was return buffer in the call for structs or simd. - Statement* retStmt = nextMorphStmt; - GenTree* retExpr = retStmt->GetRootNode(); - noway_assert(retExpr->gtOper == GT_RETURN); - - nextMorphStmt = retStmt->GetNextStmt(); - } - else - { - noway_assert(callExpr->gtGetOp1()->OperIsLocal()); - unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->GetLclNum(); - -#if FEATURE_TAILCALL_OPT_SHARED_RETURN - - // We can have a chain of assignments from the call result to - // various inline return spill temps. These are ok as long - // as the last one ultimately provides the return value or is ignored. - // - // And if we're returning a small type we may see a cast - // on the source side. - while ((nextMorphStmt != nullptr) && (nextMorphStmt->GetRootNode()->OperIs(GT_ASG))) - { - Statement* moveStmt = nextMorphStmt; - GenTree* moveExpr = nextMorphStmt->GetRootNode(); - GenTree* moveDest = moveExpr->gtGetOp1(); - noway_assert(moveDest->OperIsLocal()); - - // Tunnel through any casts on the source side. - GenTree* moveSource = moveExpr->gtGetOp2(); - while (moveSource->OperIs(GT_CAST)) - { - noway_assert(!moveSource->gtOverflow()); - moveSource = moveSource->gtGetOp1(); - } - noway_assert(moveSource->OperIsLocal()); - - // Verify we're just passing the value from one local to another - // along the chain. - const unsigned srcLclNum = moveSource->AsLclVarCommon()->GetLclNum(); - noway_assert(srcLclNum == callResultLclNumber); - const unsigned dstLclNum = moveDest->AsLclVarCommon()->GetLclNum(); - callResultLclNumber = dstLclNum; - - nextMorphStmt = moveStmt->GetNextStmt(); - } - if (nextMorphStmt != nullptr) -#endif - { - Statement* retStmt = nextMorphStmt; - GenTree* retExpr = nextMorphStmt->GetRootNode(); - noway_assert(retExpr->gtOper == GT_RETURN); - - GenTree* treeWithLcl = retExpr->gtGetOp1(); - while (treeWithLcl->gtOper == GT_CAST) - { - noway_assert(!treeWithLcl->gtOverflow()); - treeWithLcl = treeWithLcl->gtGetOp1(); - } - - noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->GetLclNum()); - - nextMorphStmt = retStmt->GetNextStmt(); - } - } - } - return nextMorphStmt == nullptr; -} - -//------------------------------------------------------------------------ -// fgCanTailCallViaJitHelper: check whether we can use the faster tailcall -// JIT helper on x86. -// -// Return Value: -// 'true' if we can; or 'false' if we should use the generic tailcall mechanism. -// -bool Compiler::fgCanTailCallViaJitHelper() -{ -#ifndef TARGET_X86 - // On anything except X86 we have no faster mechanism available. - return false; -#else - // The JIT helper does not properly handle the case where localloc was used. - if (compLocallocUsed) - return false; - - return true; -#endif -} - -static const int numberOfTrackedFlags = 5; -static const unsigned trackedFlags[numberOfTrackedFlags] = {GTF_ASG, GTF_CALL, GTF_EXCEPT, GTF_GLOB_REF, - GTF_ORDER_SIDEEFF}; - -//------------------------------------------------------------------------ -// fgMorphArgList: morph argument list tree without recursion. -// -// Arguments: -// args - argument list tree to morph; -// mac - morph address context, used to morph children. -// -// Return Value: -// morphed argument list. -// -GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac) -{ - // Use a non-recursive algorithm that morphs all actual list values, - // memorizes the last node for each effect flag and resets - // them during the second iteration. - assert((trackedFlags[0] | trackedFlags[1] | trackedFlags[2] | trackedFlags[3] | trackedFlags[4]) == GTF_ALL_EFFECT); - - GenTree* memorizedLastNodes[numberOfTrackedFlags] = {nullptr}; - - for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) - { - // Morph actual list values. - GenTree*& arg = listNode->Current(); - arg = fgMorphTree(arg, mac); - - // Remember the last list node with each flag. - for (int i = 0; i < numberOfTrackedFlags; ++i) - { - if ((arg->gtFlags & trackedFlags[i]) != 0) - { - memorizedLastNodes[i] = listNode; - } - } - } - - for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) - { - // Clear all old effects from the list node. - listNode->gtFlags &= ~GTF_ALL_EFFECT; - - // Spread each flag to all list nodes (to the prefix) before the memorized last node. - for (int i = 0; i < numberOfTrackedFlags; ++i) - { - if (memorizedLastNodes[i] != nullptr) - { - listNode->gtFlags |= trackedFlags[i]; - } - if (listNode == memorizedLastNodes[i]) - { - memorizedLastNodes[i] = nullptr; - } - } - } - - return args; -} +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Morph XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#include "allocacheck.h" // for alloca + +// Convert the given node into a call to the specified helper passing +// the given argument list. +// +// Tries to fold constants and also adds an edge for overflow exception +// returns the morphed tree +GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper) +{ + GenTree* result; + + /* If the operand is a constant, we'll try to fold it */ + if (oper->OperIsConst()) + { + GenTree* oldTree = tree; + + tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) + + if (tree != oldTree) + { + return fgMorphTree(tree); + } + else if (tree->OperKind() & GTK_CONST) + { + return fgMorphConst(tree); + } + + // assert that oper is unchanged and that it is still a GT_CAST node + noway_assert(tree->AsCast()->CastOp() == oper); + noway_assert(tree->gtOper == GT_CAST); + } + result = fgMorphIntoHelperCall(tree, helper, gtNewCallArgs(oper)); + assert(result == tree); + return result; +} + +/***************************************************************************** + * + * Convert the given node into a call to the specified helper passing + * the given argument list. + */ + +GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, GenTreeCall::Use* args, bool morphArgs) +{ + // The helper call ought to be semantically equivalent to the original node, so preserve its VN. + tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN); + + GenTreeCall* call = tree->AsCall(); + + call->gtCallType = CT_HELPER; + call->gtCallMethHnd = eeFindHelper(helper); + call->gtCallThisArg = nullptr; + call->gtCallArgs = args; + call->gtCallLateArgs = nullptr; + call->fgArgInfo = nullptr; + call->gtRetClsHnd = nullptr; + call->gtCallMoreFlags = 0; + call->gtInlineCandidateInfo = nullptr; + call->gtControlExpr = nullptr; + +#if DEBUG + // Helper calls are never candidates. + call->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER; + + call->callSig = nullptr; + +#endif // DEBUG + +#ifdef FEATURE_READYTORUN_COMPILER + call->gtEntryPoint.addr = nullptr; + call->gtEntryPoint.accessType = IAT_VALUE; +#endif + +#if FEATURE_MULTIREG_RET + call->ResetReturnType(); + call->ClearOtherRegs(); + call->ClearOtherRegFlags(); +#ifndef TARGET_64BIT + if (varTypeIsLong(tree)) + { + call->InitializeLongReturnType(); + } +#endif // !TARGET_64BIT +#endif // FEATURE_MULTIREG_RET + + if (tree->OperMayThrow(this)) + { + tree->gtFlags |= GTF_EXCEPT; + } + else + { + tree->gtFlags &= ~GTF_EXCEPT; + } + tree->gtFlags |= GTF_CALL; + + for (GenTreeCall::Use& use : GenTreeCall::UseList(args)) + { + tree->gtFlags |= (use.GetNode()->gtFlags & GTF_ALL_EFFECT); + } + + /* Perform the morphing */ + + if (morphArgs) + { + tree = fgMorphArgs(call); + } + + return tree; +} + +/***************************************************************************** + * + * Morph a cast node (we perform some very simple transformations here). + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +GenTree* Compiler::fgMorphCast(GenTree* tree) +{ + noway_assert(tree->gtOper == GT_CAST); + noway_assert(genTypeSize(TYP_I_IMPL) == TARGET_POINTER_SIZE); + + /* The first sub-operand is the thing being cast */ + + GenTree* oper = tree->AsCast()->CastOp(); + + if (fgGlobalMorph && (oper->gtOper == GT_ADDR)) + { + // Make sure we've checked if 'oper' is an address of an implicit-byref parameter. + // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast + // morphing code to see that type. + fgMorphImplicitByRefArgs(oper); + } + + var_types srcType = genActualType(oper->TypeGet()); + + var_types dstType = tree->CastToType(); + unsigned dstSize = genTypeSize(dstType); + + // See if the cast has to be done in two steps. R -> I + if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType)) + { + if (srcType == TYP_FLOAT +#if defined(TARGET_ARM64) + // Arm64: src = float, dst is overflow conversion. + // This goes through helper and hence src needs to be converted to double. + && tree->gtOverflow() +#elif defined(TARGET_AMD64) + // Amd64: src = float, dst = uint64 or overflow conversion. + // This goes through helper and hence src needs to be converted to double. + && (tree->gtOverflow() || (dstType == TYP_ULONG)) +#elif defined(TARGET_ARM) + // Arm: src = float, dst = int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType)) +#else + // x86: src = float, dst = uint32/int64/uint64 or overflow conversion. + && (tree->gtOverflow() || varTypeIsLong(dstType) || (dstType == TYP_UINT)) +#endif + ) + { + oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE); + } + + // do we need to do it in two steps R -> I, '-> smallType + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) + if (dstSize < genTypeSize(TYP_INT)) + { + oper = gtNewCastNodeL(TYP_INT, oper, tree->IsUnsigned(), TYP_INT); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); + tree->gtFlags &= ~GTF_UNSIGNED; + } +#else + if (dstSize < TARGET_POINTER_SIZE) + { + oper = gtNewCastNodeL(TYP_I_IMPL, oper, false, TYP_I_IMPL); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); + } +#endif + else + { + /* Note that if we need to use a helper call then we can not morph oper */ + if (!tree->gtOverflow()) + { +#ifdef TARGET_ARM64 // On ARM64 All non-overflow checking conversions can be optimized + goto OPTIMIZECAST; +#else + switch (dstType) + { + case TYP_INT: + goto OPTIMIZECAST; + + case TYP_UINT: +#if defined(TARGET_ARM) || defined(TARGET_AMD64) + goto OPTIMIZECAST; +#else // TARGET_X86 + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper); +#endif // TARGET_X86 + + case TYP_LONG: +#ifdef TARGET_AMD64 + // SSE2 has instructions to convert a float/double directly to a long + goto OPTIMIZECAST; +#else // !TARGET_AMD64 + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper); +#endif // !TARGET_AMD64 + + case TYP_ULONG: + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); + default: + break; + } +#endif // TARGET_ARM64 + } + else + { + switch (dstType) + { + case TYP_INT: + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper); + case TYP_UINT: + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper); + case TYP_LONG: + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper); + case TYP_ULONG: + return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper); + default: + break; + } + } + noway_assert(!"Unexpected dstType"); + } + } +#ifndef TARGET_64BIT + // The code generation phase (for x86 & ARM32) does not handle casts + // directly from [u]long to anything other than [u]int. Insert an + // intermediate cast to native int. + else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType)) + { + oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); + tree->gtFlags &= ~GTF_UNSIGNED; + } +#endif //! TARGET_64BIT + +#ifdef TARGET_ARM + else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) && + !varTypeIsLong(oper->AsCast()->CastOp())) + { + // optimization: conv.r4(conv.r8(?)) -> conv.r4(d) + // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step. + // This happens semi-frequently because there is no IL 'conv.r4.un' + oper->gtType = TYP_FLOAT; + oper->CastToType() = TYP_FLOAT; + return fgMorphTree(oper); + } + // converts long/ulong --> float/double casts into helper calls. + else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType)) + { + if (dstType == TYP_FLOAT) + { + // there is only a double helper, so we + // - change the dsttype to double + // - insert a cast from double to float + // - recurse into the resulting tree + tree->CastToType() = TYP_DOUBLE; + tree->gtType = TYP_DOUBLE; + + tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); + + return fgMorphTree(tree); + } + if (tree->gtFlags & GTF_UNSIGNED) + return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); + return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); + } +#endif // TARGET_ARM + +#ifdef TARGET_AMD64 + // Do we have to do two step U4/8 -> R4/8 ? + // Codegen supports the following conversion as one-step operation + // a) Long -> R4/R8 + // b) U8 -> R8 + // + // The following conversions are performed as two-step operations using above. + // U4 -> R4/8 = U4-> Long -> R4/8 + // U8 -> R4 = U8 -> R8 -> R4 + else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) + { + srcType = genUnsignedType(srcType); + + if (srcType == TYP_ULONG) + { + if (dstType == TYP_FLOAT) + { + // Codegen can handle U8 -> R8 conversion. + // U8 -> R4 = U8 -> R8 -> R4 + // - change the dsttype to double + // - insert a cast from double to float + // - recurse into the resulting tree + tree->CastToType() = TYP_DOUBLE; + tree->gtType = TYP_DOUBLE; + tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT); + return fgMorphTree(tree); + } + } + else if (srcType == TYP_UINT) + { + oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); + tree->gtFlags &= ~GTF_UNSIGNED; + } + } +#endif // TARGET_AMD64 + +#ifdef TARGET_X86 + // Do we have to do two step U4/8 -> R4/8 ? + else if (tree->IsUnsigned() && varTypeIsFloating(dstType)) + { + srcType = genUnsignedType(srcType); + + if (srcType == TYP_ULONG) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper); + } + else if (srcType == TYP_UINT) + { + oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG); + oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT)); + tree->gtFlags &= ~GTF_UNSIGNED; + return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); + } + } + else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType)) + { + oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); + + // Since we don't have a Jit Helper that converts to a TYP_FLOAT + // we just use the one that converts to a TYP_DOUBLE + // and then add a cast to TYP_FLOAT + // + if ((dstType == TYP_FLOAT) && (oper->OperGet() == GT_CALL)) + { + // Fix the return type to be TYP_DOUBLE + // + oper->gtType = TYP_DOUBLE; + + // Add a Cast to TYP_FLOAT + // + tree = gtNewCastNode(TYP_FLOAT, oper, false, TYP_FLOAT); + INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + + return tree; + } + else + { + return oper; + } + } +#endif // TARGET_X86 + else if (varTypeIsGC(srcType) != varTypeIsGC(dstType)) + { + // We are casting away GC information. we would like to just + // change the type to int, however this gives the emitter fits because + // it believes the variable is a GC variable at the beginning of the + // instruction group, but is not turned non-gc by the code generator + // we fix this by copying the GC pointer to a non-gc pointer temp. + noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?"); + + // We generate an assignment to an int and then do the cast from an int. With this we avoid + // the gc problem and we allow casts to bytes, longs, etc... + unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC")); + oper->gtType = TYP_I_IMPL; + GenTree* asg = gtNewTempAssign(lclNum, oper); + oper->gtType = srcType; + + // do the real cast + GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType); + + // Generate the comma tree + oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast); + + return fgMorphTree(oper); + } + + // Look for narrowing casts ([u]long -> [u]int) and try to push them + // down into the operand before morphing it. + // + // It doesn't matter if this is cast is from ulong or long (i.e. if + // GTF_UNSIGNED is set) because the transformation is only applied to + // overflow-insensitive narrowing casts, which always silently truncate. + // + // Note that casts from [u]long to small integer types are handled above. + if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT))) + { + // As a special case, look for overflow-sensitive casts of an AND + // expression, and see if the second operand is a small constant. Since + // the result of an AND is bound by its smaller operand, it may be + // possible to prove that the cast won't overflow, which will in turn + // allow the cast's operand to be transformed. + if (tree->gtOverflow() && (oper->OperGet() == GT_AND)) + { + GenTree* andOp2 = oper->AsOp()->gtOp2; + + // Special case to the special case: AND with a casted int. + if ((andOp2->OperGet() == GT_CAST) && (andOp2->AsCast()->CastOp()->OperGet() == GT_CNS_INT)) + { + // gtFoldExprConst will deal with whether the cast is signed or + // unsigned, or overflow-sensitive. + andOp2 = gtFoldExprConst(andOp2); + oper->AsOp()->gtOp2 = andOp2; + } + + // Look for a constant less than 2^{32} for a cast to uint, or less + // than 2^{31} for a cast to int. + int maxWidth = (dstType == TYP_UINT) ? 32 : 31; + + if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->AsIntConCommon()->LngValue() >> maxWidth) == 0)) + { + // This cast can't overflow. + tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT); + } + } + + // Only apply this transformation during global morph, + // when neither the cast node nor the oper node may throw an exception + // based on the upper 32 bits. + // + if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx()) + { + // For these operations the lower 32 bits of the result only depends + // upon the lower 32 bits of the operands. + // + bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG); + + // For long LSH cast to int, there is a discontinuity in behavior + // when the shift amount is 32 or larger. + // + // CAST(INT, LSH(1LL, 31)) == LSH(1, 31) + // LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31) + // + // CAST(INT, LSH(1LL, 32)) == 0 + // LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1 + // + // So some extra validation is needed. + // + if (oper->OperIs(GT_LSH)) + { + GenTree* shiftAmount = oper->AsOp()->gtOp2; + + // Expose constant value for shift, if possible, to maximize the number + // of cases we can handle. + shiftAmount = gtFoldExpr(shiftAmount); + oper->AsOp()->gtOp2 = shiftAmount; + +#if DEBUG + // We may remorph the shift amount tree again later, so clear any morphed flag. + shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + + if (shiftAmount->IsIntegralConst()) + { + const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue(); + + if ((shiftAmountValue >= 64) || (shiftAmountValue < 0)) + { + // Shift amount is large enough or negative so result is undefined. + // Don't try to optimize. + assert(!canPushCast); + } + else if (shiftAmountValue >= 32) + { + // We know that we have a narrowing cast ([u]long -> [u]int) + // and that we are casting to a 32-bit value, which will result in zero. + // + // Check to see if we have any side-effects that we must keep + // + if ((tree->gtFlags & GTF_ALL_EFFECT) == 0) + { + // Result of the shift is zero. + DEBUG_DESTROY_NODE(tree); + GenTree* zero = gtNewZeroConNode(TYP_INT); + return fgMorphTree(zero); + } + else // We do have a side-effect + { + // We could create a GT_COMMA node here to keep the side-effect and return a zero + // Instead we just don't try to optimize this case. + canPushCast = false; + } + } + else + { + // Shift amount is positive and small enough that we can push the cast through. + canPushCast = true; + } + } + else + { + // Shift amount is unknown. We can't optimize this case. + assert(!canPushCast); + } + } + + if (canPushCast) + { + DEBUG_DESTROY_NODE(tree); + + // Insert narrowing casts for op1 and op2. + oper->AsOp()->gtOp1 = gtNewCastNode(TYP_INT, oper->AsOp()->gtOp1, false, dstType); + if (oper->AsOp()->gtOp2 != nullptr) + { + oper->AsOp()->gtOp2 = gtNewCastNode(TYP_INT, oper->AsOp()->gtOp2, false, dstType); + } + + // Clear the GT_MUL_64RSLT if it is set. + if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT)) + { + oper->gtFlags &= ~GTF_MUL_64RSLT; + } + + // The operation now produces a 32-bit result. + oper->gtType = TYP_INT; + + // Remorph the new tree as the casts that we added may be folded away. + return fgMorphTree(oper); + } + } + } + +OPTIMIZECAST: + noway_assert(tree->gtOper == GT_CAST); + + /* Morph the operand */ + tree->AsCast()->CastOp() = oper = fgMorphTree(oper); + + /* Reset the call flag */ + tree->gtFlags &= ~GTF_CALL; + + /* Reset the assignment flag */ + tree->gtFlags &= ~GTF_ASG; + + /* unless we have an overflow cast, reset the except flag */ + if (!tree->gtOverflow()) + { + tree->gtFlags &= ~GTF_EXCEPT; + } + + /* Just in case new side effects were introduced */ + tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT); + + if (!gtIsActiveCSE_Candidate(tree) && !gtIsActiveCSE_Candidate(oper)) + { + srcType = oper->TypeGet(); + + /* See if we can discard the cast */ + if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType)) + { + if (tree->IsUnsigned() && !varTypeIsUnsigned(srcType)) + { + if (varTypeIsSmall(srcType)) + { + // Small signed values are automatically sign extended to TYP_INT. If the cast is interpreting the + // resulting TYP_INT value as unsigned then the "sign" bits end up being "value" bits and srcType + // must be TYP_UINT, not the original small signed type. Otherwise "conv.ovf.i2.un(i1(-1))" is + // wrongly treated as a widening conversion from i1 to i2 when in fact it is a narrowing conversion + // from u4 to i2. + srcType = genActualType(srcType); + } + + srcType = genUnsignedType(srcType); + } + + if (srcType == dstType) + { // Certainly if they are identical it is pointless + goto REMOVE_CAST; + } + + if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType)) + { + unsigned varNum = oper->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[varNum]; + if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore()) + { + goto REMOVE_CAST; + } + } + + bool unsignedSrc = varTypeIsUnsigned(srcType); + bool unsignedDst = varTypeIsUnsigned(dstType); + bool signsDiffer = (unsignedSrc != unsignedDst); + unsigned srcSize = genTypeSize(srcType); + + // For same sized casts with + // the same signs or non-overflow cast we discard them as well + if (srcSize == dstSize) + { + /* This should have been handled above */ + noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType)); + + if (!signsDiffer) + { + goto REMOVE_CAST; + } + + if (!tree->gtOverflow()) + { + /* For small type casts, when necessary we force + the src operand to the dstType and allow the + implied load from memory to perform the casting */ + if (varTypeIsSmall(srcType)) + { + switch (oper->gtOper) + { + case GT_IND: + case GT_CLS_VAR: + case GT_LCL_FLD: + case GT_ARR_ELEM: + oper->gtType = dstType; + // We're changing the type here so we need to update the VN; + // in other cases we discard the cast without modifying oper + // so the VN doesn't change. + oper->SetVNsFromNode(tree); + goto REMOVE_CAST; + default: + break; + } + } + else + { + goto REMOVE_CAST; + } + } + } + else if (srcSize < dstSize) // widening cast + { + // Keep any long casts + if (dstSize == sizeof(int)) + { + // Only keep signed to unsigned widening cast with overflow check + if (!tree->gtOverflow() || !unsignedDst || unsignedSrc) + { + goto REMOVE_CAST; + } + } + + // Widening casts from unsigned or to signed can never overflow + + if (unsignedSrc || !unsignedDst) + { + tree->gtFlags &= ~GTF_OVERFLOW; + if (!(oper->gtFlags & GTF_EXCEPT)) + { + tree->gtFlags &= ~GTF_EXCEPT; + } + } + } + else // if (srcSize > dstSize) + { + // Try to narrow the operand of the cast and discard the cast + // Note: Do not narrow a cast that is marked as a CSE + // And do not narrow if the oper is marked as a CSE either + // + if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) && + optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false)) + { + optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true); + + /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */ + if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType())) + { + oper = oper->AsCast()->CastOp(); + } + goto REMOVE_CAST; + } + } + } + + switch (oper->gtOper) + { + /* If the operand is a constant, we'll fold it */ + case GT_CNS_INT: + case GT_CNS_LNG: + case GT_CNS_DBL: + case GT_CNS_STR: + { + GenTree* oldTree = tree; + + tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...) + + // Did we get a comma throw as a result of gtFoldExprConst? + if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA)) + { + noway_assert(fgIsCommaThrow(tree)); + tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1); + fgMorphTreeDone(tree); + return tree; + } + else if (tree->gtOper != GT_CAST) + { + return tree; + } + + noway_assert(tree->AsCast()->CastOp() == oper); // unchanged + } + break; + + case GT_CAST: + /* Check for two consecutive casts into the same dstType */ + if (!tree->gtOverflow()) + { + var_types dstType2 = oper->CastToType(); + if (dstType == dstType2) + { + goto REMOVE_CAST; + } + } + break; + + case GT_COMMA: + // Check for cast of a GT_COMMA with a throw overflow + // Bug 110829: Since this optimization will bash the types + // neither oper or commaOp2 can be CSE candidates + if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate + { + GenTree* commaOp2 = oper->AsOp()->gtOp2; + + if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate + { + // need type of oper to be same as tree + if (tree->gtType == TYP_LONG) + { + commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); + commaOp2->AsIntConCommon()->SetLngValue(0); + /* Change the types of oper and commaOp2 to TYP_LONG */ + oper->gtType = commaOp2->gtType = TYP_LONG; + } + else if (varTypeIsFloating(tree->gtType)) + { + commaOp2->ChangeOperConst(GT_CNS_DBL); + commaOp2->AsDblCon()->gtDconVal = 0.0; + // Change the types of oper and commaOp2 + oper->gtType = commaOp2->gtType = tree->gtType; + } + else + { + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->AsIntCon()->gtIconVal = 0; + /* Change the types of oper and commaOp2 to TYP_INT */ + oper->gtType = commaOp2->gtType = TYP_INT; + } + } + + if (vnStore != nullptr) + { + fgValueNumberTreeConst(commaOp2); + } + + /* Return the GT_COMMA node as the new tree */ + return oper; + } + break; + + default: + break; + } /* end switch (oper->gtOper) */ + } + + if (tree->gtOverflow()) + { + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); + } + + return tree; + +REMOVE_CAST: + /* Here we've eliminated the cast, so just return it's operand */ + assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate + + DEBUG_DESTROY_NODE(tree); + return oper; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +#ifdef DEBUG +void fgArgTabEntry::Dump() const +{ + printf("fgArgTabEntry[arg %u", argNum); + printf(" %d.%s", GetNode()->gtTreeID, GenTree::OpName(GetNode()->OperGet())); + printf(" %s", varTypeName(argType)); + printf(" (%s)", passedByRef ? "By ref" : "By value"); + if (GetRegNum() != REG_STK) + { + printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s"); + for (unsigned i = 0; i < numRegs; i++) + { + printf(" %s", getRegName(regNums[i])); + } + } + if (GetStackByteSize() > 0) + { +#if defined(DEBUG_ARG_SLOTS) + printf(", numSlots=%u, slotNum=%u, byteSize=%u, byteOffset=%u", numSlots, slotNum, m_byteSize, m_byteOffset); +#else + printf(", byteSize=%u, byteOffset=%u", m_byteSize, m_byteOffset); + +#endif + } + printf(", byteAlignment=%u", m_byteAlignment); + if (isLateArg()) + { + printf(", lateArgInx=%u", GetLateArgInx()); + } + if (IsSplit()) + { + printf(", isSplit"); + } + if (needTmp) + { + printf(", tmpNum=V%02u", tmpNum); + } + if (needPlace) + { + printf(", needPlace"); + } + if (isTmp) + { + printf(", isTmp"); + } + if (processed) + { + printf(", processed"); + } + if (IsHfaRegArg()) + { + printf(", isHfa(%s)", varTypeName(GetHfaType())); + } + if (isBackFilled) + { + printf(", isBackFilled"); + } + if (isNonStandard) + { + printf(", isNonStandard"); + } + if (isStruct) + { + printf(", isStruct"); + } + printf("]\n"); +} +#endif + +fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs) +{ + compiler = comp; + callTree = call; + argCount = 0; // filled in arg count, starts at zero + DEBUG_ARG_SLOTS_ONLY(nextSlotNum = INIT_ARG_STACK_SLOT;) + nextStackByteOffset = INIT_ARG_STACK_SLOT * TARGET_POINTER_SIZE; + stkLevel = 0; +#if defined(UNIX_X86_ABI) + alignmentDone = false; + stkSizeBytes = 0; + padStkAlign = 0; +#endif +#if FEATURE_FIXED_OUT_ARGS + outArgSize = 0; +#endif + + argTableSize = numArgs; // the allocated table size + + hasRegArgs = false; + hasStackArgs = false; + argsComplete = false; + argsSorted = false; + needsTemps = false; + + if (argTableSize == 0) + { + argTable = nullptr; + } + else + { + argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; + } +} + +/***************************************************************************** + * + * fgArgInfo Copy Constructor + * + * This method needs to act like a copy constructor for fgArgInfo. + * The newCall needs to have its fgArgInfo initialized such that + * we have newCall that is an exact copy of the oldCall. + * We have to take care since the argument information + * in the argTable contains pointers that must point to the + * new arguments and not the old arguments. + */ +fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall) +{ + fgArgInfo* oldArgInfo = oldCall->AsCall()->fgArgInfo; + + compiler = oldArgInfo->compiler; + callTree = newCall; + argCount = 0; // filled in arg count, starts at zero + DEBUG_ARG_SLOTS_ONLY(nextSlotNum = INIT_ARG_STACK_SLOT;) + nextStackByteOffset = INIT_ARG_STACK_SLOT * TARGET_POINTER_SIZE; + stkLevel = oldArgInfo->stkLevel; +#if defined(UNIX_X86_ABI) + alignmentDone = oldArgInfo->alignmentDone; + stkSizeBytes = oldArgInfo->stkSizeBytes; + padStkAlign = oldArgInfo->padStkAlign; +#endif +#if FEATURE_FIXED_OUT_ARGS + outArgSize = oldArgInfo->outArgSize; +#endif + argTableSize = oldArgInfo->argTableSize; + argsComplete = false; + argTable = nullptr; + + assert(oldArgInfo->argsComplete); + + if (argTableSize > 0) + { + argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntry*[argTableSize]; + + // Copy the old arg entries + for (unsigned i = 0; i < argTableSize; i++) + { + argTable[i] = new (compiler, CMK_fgArgInfo) fgArgTabEntry(*oldArgInfo->argTable[i]); + } + + // The copied arg entries contain pointers to old uses, they need + // to be updated to point to new uses. + if (newCall->gtCallThisArg != nullptr) + { + for (unsigned i = 0; i < argTableSize; i++) + { + if (argTable[i]->use == oldCall->gtCallThisArg) + { + argTable[i]->use = newCall->gtCallThisArg; + break; + } + } + } + + GenTreeCall::UseIterator newUse = newCall->Args().begin(); + GenTreeCall::UseIterator newUseEnd = newCall->Args().end(); + GenTreeCall::UseIterator oldUse = oldCall->Args().begin(); + GenTreeCall::UseIterator oldUseEnd = newCall->Args().end(); + + for (; newUse != newUseEnd; ++newUse, ++oldUse) + { + for (unsigned i = 0; i < argTableSize; i++) + { + if (argTable[i]->use == oldUse.GetUse()) + { + argTable[i]->use = newUse.GetUse(); + break; + } + } + } + + newUse = newCall->LateArgs().begin(); + newUseEnd = newCall->LateArgs().end(); + oldUse = oldCall->LateArgs().begin(); + oldUseEnd = newCall->LateArgs().end(); + + for (; newUse != newUseEnd; ++newUse, ++oldUse) + { + for (unsigned i = 0; i < argTableSize; i++) + { + if (argTable[i]->lateUse == oldUse.GetUse()) + { + argTable[i]->lateUse = newUse.GetUse(); + break; + } + } + } + } + + argCount = oldArgInfo->argCount; + DEBUG_ARG_SLOTS_ONLY(nextSlotNum = oldArgInfo->nextSlotNum;) + nextStackByteOffset = oldArgInfo->nextStackByteOffset; + + hasRegArgs = oldArgInfo->hasRegArgs; + hasStackArgs = oldArgInfo->hasStackArgs; + argsComplete = true; + argsSorted = true; +} + +void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry) +{ + assert(argCount < argTableSize); + argTable[argCount] = curArgTabEntry; + argCount++; +} + +fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, + GenTree* node, + GenTreeCall::Use* use, + regNumber regNum, + unsigned numRegs, + unsigned byteSize, + unsigned byteAlignment, + bool isStruct, + bool isFloatHfa, + bool isVararg /*=false*/) +{ + fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; + + // Any additional register numbers are set by the caller. + // This is primarily because on ARM we don't yet know if it + // will be split or if it is a double HFA, so the number of registers + // may actually be less. + curArgTabEntry->setRegNum(0, regNum); + + curArgTabEntry->argNum = argNum; + curArgTabEntry->argType = node->TypeGet(); + curArgTabEntry->use = use; + curArgTabEntry->lateUse = nullptr; + curArgTabEntry->numRegs = numRegs; + +#if defined(DEBUG_ARG_SLOTS) + curArgTabEntry->slotNum = 0; + curArgTabEntry->numSlots = 0; +#endif + + curArgTabEntry->SetLateArgInx(UINT_MAX); + curArgTabEntry->tmpNum = BAD_VAR_NUM; + curArgTabEntry->SetSplit(false); + curArgTabEntry->isTmp = false; + curArgTabEntry->needTmp = false; + curArgTabEntry->needPlace = false; + curArgTabEntry->processed = false; + if (GlobalJitOptions::compFeatureHfa) + { + curArgTabEntry->SetHfaElemKind(CORINFO_HFA_ELEM_NONE); + } + curArgTabEntry->isBackFilled = false; + curArgTabEntry->isNonStandard = false; + curArgTabEntry->isStruct = isStruct; + curArgTabEntry->SetIsVararg(isVararg); + curArgTabEntry->SetByteAlignment(byteAlignment); + curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa); + curArgTabEntry->SetByteOffset(0); + + hasRegArgs = true; + AddArg(curArgTabEntry); + return curArgTabEntry; +} + +#if defined(UNIX_AMD64_ABI) +fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, + GenTree* node, + GenTreeCall::Use* use, + regNumber regNum, + unsigned numRegs, + unsigned byteSize, + unsigned byteAlignment, + const bool isStruct, + const bool isFloatHfa, + const bool isVararg, + const regNumber otherRegNum, + const unsigned structIntRegs, + const unsigned structFloatRegs, + const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr) +{ + fgArgTabEntry* curArgTabEntry = + AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, isFloatHfa, isVararg); + assert(curArgTabEntry != nullptr); + + curArgTabEntry->isStruct = isStruct; // is this a struct arg + curArgTabEntry->structIntRegs = structIntRegs; + curArgTabEntry->structFloatRegs = structFloatRegs; + + INDEBUG(curArgTabEntry->checkIsStruct();) + assert(numRegs <= 2); + if (numRegs == 2) + { + curArgTabEntry->setRegNum(1, otherRegNum); + } + + if (isStruct && structDescPtr != nullptr) + { + curArgTabEntry->structDesc.CopyFrom(*structDescPtr); + } + + return curArgTabEntry; +} +#endif // defined(UNIX_AMD64_ABI) + +fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, + GenTree* node, + GenTreeCall::Use* use, + unsigned numSlots, + unsigned byteSize, + unsigned byteAlignment, + bool isStruct, + bool isFloatHfa, + bool isVararg /*=false*/) +{ + fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; + +#if defined(DEBUG_ARG_SLOTS) + nextSlotNum = roundUp(nextSlotNum, byteAlignment / TARGET_POINTER_SIZE); +#endif + + nextStackByteOffset = roundUp(nextStackByteOffset, byteAlignment); + DEBUG_ARG_SLOTS_ASSERT(nextStackByteOffset / TARGET_POINTER_SIZE == nextSlotNum); + + curArgTabEntry->setRegNum(0, REG_STK); + curArgTabEntry->argNum = argNum; + curArgTabEntry->argType = node->TypeGet(); + curArgTabEntry->use = use; + curArgTabEntry->lateUse = nullptr; +#if defined(DEBUG_ARG_SLOTS) + curArgTabEntry->numSlots = numSlots; + curArgTabEntry->slotNum = nextSlotNum; +#endif + + curArgTabEntry->numRegs = 0; +#if defined(UNIX_AMD64_ABI) + curArgTabEntry->structIntRegs = 0; + curArgTabEntry->structFloatRegs = 0; +#endif // defined(UNIX_AMD64_ABI) + curArgTabEntry->SetLateArgInx(UINT_MAX); + curArgTabEntry->tmpNum = BAD_VAR_NUM; + curArgTabEntry->SetSplit(false); + curArgTabEntry->isTmp = false; + curArgTabEntry->needTmp = false; + curArgTabEntry->needPlace = false; + curArgTabEntry->processed = false; + if (GlobalJitOptions::compFeatureHfa) + { + curArgTabEntry->SetHfaElemKind(CORINFO_HFA_ELEM_NONE); + } + curArgTabEntry->isBackFilled = false; + curArgTabEntry->isNonStandard = false; + curArgTabEntry->isStruct = isStruct; + curArgTabEntry->SetIsVararg(isVararg); + + curArgTabEntry->SetByteAlignment(byteAlignment); + curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa); + curArgTabEntry->SetByteOffset(nextStackByteOffset); + + hasStackArgs = true; + AddArg(curArgTabEntry); + DEBUG_ARG_SLOTS_ONLY(nextSlotNum += numSlots;) + nextStackByteOffset += curArgTabEntry->GetByteSize(); + + return curArgTabEntry; +} + +void fgArgInfo::RemorphReset() +{ + DEBUG_ARG_SLOTS_ONLY(nextSlotNum = INIT_ARG_STACK_SLOT;) + nextStackByteOffset = INIT_ARG_STACK_SLOT * TARGET_POINTER_SIZE; +} + +//------------------------------------------------------------------------ +// UpdateRegArg: Update the given fgArgTabEntry while morphing. +// +// Arguments: +// curArgTabEntry - the fgArgTabEntry to update. +// node - the tree node that defines the argument +// reMorphing - a boolean value indicate whether we are remorphing the call +// +// Assumptions: +// This must have already been determined to be at least partially passed in registers. +// +void fgArgInfo::UpdateRegArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) +{ + bool isLateArg = curArgTabEntry->isLateArg(); + // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. + assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || + (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); + + assert(curArgTabEntry->numRegs != 0); + assert(curArgTabEntry->use->GetNode() == node); +} + +//------------------------------------------------------------------------ +// UpdateStkArg: Update the given fgArgTabEntry while morphing. +// +// Arguments: +// curArgTabEntry - the fgArgTabEntry to update. +// node - the tree node that defines the argument +// reMorphing - a boolean value indicate whether we are remorphing the call +// +// Assumptions: +// This must have already been determined to be passed on the stack. +// +void fgArgInfo::UpdateStkArg(fgArgTabEntry* curArgTabEntry, GenTree* node, bool reMorphing) +{ + bool isLateArg = curArgTabEntry->isLateArg(); + // If this is a late arg, we'd better be updating it with a correctly marked node, and vice-versa. + assert((isLateArg && ((node->gtFlags & GTF_LATE_ARG) != 0)) || + (!isLateArg && ((node->gtFlags & GTF_LATE_ARG) == 0))); + + noway_assert(curArgTabEntry->use != callTree->gtCallThisArg); + assert((curArgTabEntry->GetRegNum() == REG_STK) || curArgTabEntry->IsSplit()); + assert(curArgTabEntry->use->GetNode() == node); +#if defined(DEBUG_ARG_SLOTS) + nextSlotNum = roundUp(nextSlotNum, curArgTabEntry->GetByteAlignment() / TARGET_POINTER_SIZE); + assert(curArgTabEntry->slotNum == nextSlotNum); + nextSlotNum += curArgTabEntry->numSlots; +#endif + + nextStackByteOffset = roundUp(nextStackByteOffset, curArgTabEntry->GetByteAlignment()); + assert(curArgTabEntry->GetByteOffset() == nextStackByteOffset); + nextStackByteOffset += curArgTabEntry->GetStackByteSize(); +} + +void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots) +{ + fgArgTabEntry* curArgTabEntry = nullptr; + assert(argNum < argCount); + for (unsigned inx = 0; inx < argCount; inx++) + { + curArgTabEntry = argTable[inx]; + if (curArgTabEntry->argNum == argNum) + { + break; + } + } + + assert(numRegs > 0); + assert(numSlots > 0); + + if (argsComplete) + { + assert(curArgTabEntry->IsSplit() == true); + assert(curArgTabEntry->numRegs == numRegs); + DEBUG_ARG_SLOTS_ONLY(assert(curArgTabEntry->numSlots == numSlots);) + assert(hasStackArgs == true); + } + else + { + curArgTabEntry->SetSplit(true); + curArgTabEntry->numRegs = numRegs; + DEBUG_ARG_SLOTS_ONLY(curArgTabEntry->numSlots = numSlots;) + curArgTabEntry->SetByteOffset(0); + hasStackArgs = true; + } + DEBUG_ARG_SLOTS_ONLY(nextSlotNum += numSlots;) + // TODO-Cleanup: structs are aligned to 8 bytes on arm64 apple, so it would work, but pass the precise size. + nextStackByteOffset += numSlots * TARGET_POINTER_SIZE; +} + +//------------------------------------------------------------------------ +// EvalToTmp: Replace the node in the given fgArgTabEntry with a temp +// +// Arguments: +// curArgTabEntry - the fgArgTabEntry for the argument +// tmpNum - the varNum for the temp +// newNode - the assignment of the argument value to the temp +// +// Notes: +// Although the name of this method is EvalToTmp, it doesn't actually create +// the temp or the copy. +// +void fgArgInfo::EvalToTmp(fgArgTabEntry* curArgTabEntry, unsigned tmpNum, GenTree* newNode) +{ + assert(curArgTabEntry->use != callTree->gtCallThisArg); + assert(curArgTabEntry->use->GetNode() == newNode); + + assert(curArgTabEntry->GetNode() == newNode); + curArgTabEntry->tmpNum = tmpNum; + curArgTabEntry->isTmp = true; +} + +void fgArgInfo::ArgsComplete() +{ + bool hasStructRegArg = false; + + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + assert(curArgTabEntry != nullptr); + GenTree* argx = curArgTabEntry->GetNode(); + + if (curArgTabEntry->GetRegNum() == REG_STK) + { + assert(hasStackArgs == true); +#if !FEATURE_FIXED_OUT_ARGS + // On x86 we use push instructions to pass arguments: + // The non-register arguments are evaluated and pushed in order + // and they are never evaluated into temps + // + continue; +#endif + } +#if FEATURE_ARG_SPLIT + else if (curArgTabEntry->IsSplit()) + { + hasStructRegArg = true; + assert(hasStackArgs == true); + } +#endif // FEATURE_ARG_SPLIT + else // we have a register argument, next we look for a struct type. + { + if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct)) + { + hasStructRegArg = true; + } + } + + /* If the argument tree contains an assignment (GTF_ASG) then the argument and + and every earlier argument (except constants) must be evaluated into temps + since there may be other arguments that follow and they may use the value being assigned. + + EXAMPLE: ArgTab is "a, a=5, a" + -> when we see the second arg "a=5" + we know the first two arguments "a, a=5" have to be evaluated into temps + + For the case of an assignment, we only know that there exist some assignment someplace + in the tree. We don't know what is being assigned so we are very conservative here + and assume that any local variable could have been assigned. + */ + + if (argx->gtFlags & GTF_ASG) + { + // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to + // a tmp, then we need a temp in the late arg list. + if ((argCount > 1) || argx->OperIsCopyBlkOp() +#ifdef FEATURE_FIXED_OUT_ARGS + || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property + // that we only have late non-register args when that feature is on. +#endif // FEATURE_FIXED_OUT_ARGS + ) + { + curArgTabEntry->needTmp = true; + needsTemps = true; + } + + // For all previous arguments, unless they are a simple constant + // we require that they be evaluated into temps + for (unsigned prevInx = 0; prevInx < curInx; prevInx++) + { + fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; + assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); + + if (!prevArgTabEntry->GetNode()->IsInvariant()) + { + prevArgTabEntry->needTmp = true; + needsTemps = true; + } + } + } + + bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0); +#if FEATURE_FIXED_OUT_ARGS + // Like calls, if this argument has a tree that will do an inline throw, + // a call to a jit helper, then we need to treat it like a call (but only + // if there are/were any stack args). + // This means unnesting, sorting, etc. Technically this is overly + // conservative, but I want to avoid as much special-case debug-only code + // as possible, so leveraging the GTF_CALL flag is the easiest. + // + if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && compiler->opts.compDbgCode && + (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT)) + { + for (unsigned otherInx = 0; otherInx < argCount; otherInx++) + { + if (otherInx == curInx) + { + continue; + } + + if (argTable[otherInx]->GetRegNum() == REG_STK) + { + treatLikeCall = true; + break; + } + } + } +#endif // FEATURE_FIXED_OUT_ARGS + + /* If it contains a call (GTF_CALL) then itself and everything before the call + with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT + has to be kept in the right order since we will move the call to the first position) + + For calls we don't have to be quite as conservative as we are with an assignment + since the call won't be modifying any non-address taken LclVars. + */ + + if (treatLikeCall) + { + if (argCount > 1) // If this is not the only argument + { + curArgTabEntry->needTmp = true; + needsTemps = true; + } + else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL)) + { + // Spill all arguments that are floating point calls + curArgTabEntry->needTmp = true; + needsTemps = true; + } + + // All previous arguments may need to be evaluated into temps + for (unsigned prevInx = 0; prevInx < curInx; prevInx++) + { + fgArgTabEntry* prevArgTabEntry = argTable[prevInx]; + assert(prevArgTabEntry->argNum < curArgTabEntry->argNum); + + // For all previous arguments, if they have any GTF_ALL_EFFECT + // we require that they be evaluated into a temp + if ((prevArgTabEntry->GetNode()->gtFlags & GTF_ALL_EFFECT) != 0) + { + prevArgTabEntry->needTmp = true; + needsTemps = true; + } +#if FEATURE_FIXED_OUT_ARGS + // Or, if they are stored into the FIXED_OUT_ARG area + // we require that they be moved to the gtCallLateArgs + // and replaced with a placeholder node + else if (prevArgTabEntry->GetRegNum() == REG_STK) + { + prevArgTabEntry->needPlace = true; + } +#if FEATURE_ARG_SPLIT + else if (prevArgTabEntry->IsSplit()) + { + prevArgTabEntry->needPlace = true; + } +#endif // TARGET_ARM +#endif + } + } + +#if FEATURE_MULTIREG_ARGS + // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST + // with multiple indirections, so here we consider spilling it into a tmp LclVar. + // + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef TARGET_ARM + bool isMultiRegArg = + (curArgTabEntry->numRegs > 0) && (curArgTabEntry->numRegs + curArgTabEntry->GetStackSlotsNumber() > 1); +#else + bool isMultiRegArg = (curArgTabEntry->numRegs > 1); +#endif + + if ((varTypeIsStruct(argx->TypeGet())) && (curArgTabEntry->needTmp == false)) + { + if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)) + { + // Spill multireg struct arguments that have Assignments or Calls embedded in them + curArgTabEntry->needTmp = true; + needsTemps = true; + } + else + { + // We call gtPrepareCost to measure the cost of evaluating this tree + compiler->gtPrepareCost(argx); + + if (isMultiRegArg && (argx->GetCostEx() > (6 * IND_COST_EX))) + { + // Spill multireg struct arguments that are expensive to evaluate twice + curArgTabEntry->needTmp = true; + needsTemps = true; + } +#if defined(FEATURE_SIMD) && defined(TARGET_ARM64) + else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet())) + { + // SIMD types do not need the optimization below due to their sizes + if (argx->OperIsSimdOrHWintrinsic() || + (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) && + argx->AsObj()->gtOp1->AsOp()->gtOp1->OperIsSimdOrHWintrinsic())) + { + curArgTabEntry->needTmp = true; + needsTemps = true; + } + } +#endif +#ifndef TARGET_ARM + // TODO-Arm: This optimization is not implemented for ARM32 + // so we skip this for ARM32 until it is ported to use RyuJIT backend + // + else if (argx->OperGet() == GT_OBJ) + { + GenTreeObj* argObj = argx->AsObj(); + unsigned structSize = argObj->GetLayout()->GetSize(); + switch (structSize) + { + case 3: + case 5: + case 6: + case 7: + // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes + // + if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar? + { + // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes + // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp. + // + curArgTabEntry->needTmp = true; + needsTemps = true; + } + break; + case 11: + case 13: + case 14: + case 15: + // Spill any GT_OBJ multireg structs that are difficult to extract + // + // When we have a GT_OBJ of a struct with the above sizes we would need + // to use 3 or 4 load instructions to load the exact size of this struct. + // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence + // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp. + // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing + // the argument. + // + curArgTabEntry->needTmp = true; + needsTemps = true; + break; + + default: + break; + } + } +#endif // !TARGET_ARM + } + } +#endif // FEATURE_MULTIREG_ARGS + } + + // We only care because we can't spill structs and qmarks involve a lot of spilling, but + // if we don't have qmarks, then it doesn't matter. + // So check for Qmark's globally once here, instead of inside the loop. + // + const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed); + +#if FEATURE_FIXED_OUT_ARGS + + // For Arm/x64 we only care because we can't reorder a register + // argument that uses GT_LCLHEAP. This is an optimization to + // save a check inside the below loop. + // + const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed); + +#else + + const bool hasStackArgsWeCareAbout = hasStackArgs; + +#endif // FEATURE_FIXED_OUT_ARGS + + // If we have any stack args we have to force the evaluation + // of any arguments passed in registers that might throw an exception + // + // Technically we only a required to handle the following two cases: + // a GT_IND with GTF_IND_RNGCHK (only on x86) or + // a GT_LCLHEAP node that allocates stuff on the stack + // + if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout) + { + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + assert(curArgTabEntry != nullptr); + GenTree* argx = curArgTabEntry->GetNode(); + + // Examine the register args that are currently not marked needTmp + // + if (!curArgTabEntry->needTmp && (curArgTabEntry->GetRegNum() != REG_STK)) + { + if (hasStackArgsWeCareAbout) + { +#if !FEATURE_FIXED_OUT_ARGS + // On x86 we previously recorded a stack depth of zero when + // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag + // Thus we can not reorder the argument after any stack based argument + // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to + // check for it explicitly.) + // + if (argx->gtFlags & GTF_EXCEPT) + { + curArgTabEntry->needTmp = true; + needsTemps = true; + continue; + } +#else + // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP + // + if (argx->gtFlags & GTF_EXCEPT) + { + assert(compiler->compLocallocUsed); + + // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree + // + if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT) + { + curArgTabEntry->needTmp = true; + needsTemps = true; + continue; + } + } +#endif + } + if (hasStructRegArgWeCareAbout) + { + // Returns true if a GT_QMARK node is encountered in the argx tree + // + if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT) + { + curArgTabEntry->needTmp = true; + needsTemps = true; + continue; + } + } + } + } + } + + argsComplete = true; +} + +void fgArgInfo::SortArgs() +{ + assert(argsComplete == true); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nSorting the arguments:\n"); + } +#endif + + /* Shuffle the arguments around before we build the gtCallLateArgs list. + The idea is to move all "simple" arguments like constants and local vars + to the end of the table, and move the complex arguments towards the beginning + of the table. This will help prevent registers from being spilled by + allowing us to evaluate the more complex arguments before the simpler arguments. + The argTable ends up looking like: + +------------------------------------+ <--- argTable[argCount - 1] + | constants | + +------------------------------------+ + | local var / local field | + +------------------------------------+ + | remaining arguments sorted by cost | + +------------------------------------+ + | temps (argTable[].needTmp = true) | + +------------------------------------+ + | args with calls (GTF_CALL) | + +------------------------------------+ <--- argTable[0] + */ + + /* Set the beginning and end for the new argument table */ + unsigned curInx; + int regCount = 0; + unsigned begTab = 0; + unsigned endTab = argCount - 1; + unsigned argsRemaining = argCount; + + // First take care of arguments that are constants. + // [We use a backward iterator pattern] + // + curInx = argCount; + do + { + curInx--; + + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + if (curArgTabEntry->GetRegNum() != REG_STK) + { + regCount++; + } + + assert(curArgTabEntry->lateUse == nullptr); + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTree* argx = curArgTabEntry->GetNode(); + + // put constants at the end of the table + // + if (argx->gtOper == GT_CNS_INT) + { + noway_assert(curInx <= endTab); + + curArgTabEntry->processed = true; + + // place curArgTabEntry at the endTab position by performing a swap + // + if (curInx != endTab) + { + argTable[curInx] = argTable[endTab]; + argTable[endTab] = curArgTabEntry; + } + + endTab--; + argsRemaining--; + } + } + } while (curInx > 0); + + if (argsRemaining > 0) + { + // Next take care of arguments that are calls. + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTree* argx = curArgTabEntry->GetNode(); + + // put calls at the beginning of the table + // + if (argx->gtFlags & GTF_CALL) + { + curArgTabEntry->processed = true; + + // place curArgTabEntry at the begTab position by performing a swap + // + if (curInx != begTab) + { + argTable[curInx] = argTable[begTab]; + argTable[begTab] = curArgTabEntry; + } + + begTab++; + argsRemaining--; + } + } + } + } + + if (argsRemaining > 0) + { + // Next take care arguments that are temps. + // These temps come before the arguments that are + // ordinary local vars or local fields + // since this will give them a better chance to become + // enregistered into their actual argument register. + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + if (curArgTabEntry->needTmp) + { + curArgTabEntry->processed = true; + + // place curArgTabEntry at the begTab position by performing a swap + // + if (curInx != begTab) + { + argTable[curInx] = argTable[begTab]; + argTable[begTab] = curArgTabEntry; + } + + begTab++; + argsRemaining--; + } + } + } + } + + if (argsRemaining > 0) + { + // Next take care of local var and local field arguments. + // These are moved towards the end of the argument evaluation. + // [We use a backward iterator pattern] + // + curInx = endTab + 1; + do + { + curInx--; + + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTree* argx = curArgTabEntry->GetNode(); + + if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD)) + { + noway_assert(curInx <= endTab); + + curArgTabEntry->processed = true; + + // place curArgTabEntry at the endTab position by performing a swap + // + if (curInx != endTab) + { + argTable[curInx] = argTable[endTab]; + argTable[endTab] = curArgTabEntry; + } + + endTab--; + argsRemaining--; + } + } + } while (curInx > begTab); + } + + // Finally, take care of all the remaining arguments. + // Note that we fill in one arg at a time using a while loop. + bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop + while (argsRemaining > 0) + { + /* Find the most expensive arg remaining and evaluate it next */ + + fgArgTabEntry* expensiveArgTabEntry = nullptr; + unsigned expensiveArg = UINT_MAX; + unsigned expensiveArgCost = 0; + + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + // Skip any already processed args + // + if (!curArgTabEntry->processed) + { + GenTree* argx = curArgTabEntry->GetNode(); + + // We should have already handled these kinds of args + assert(argx->gtOper != GT_LCL_VAR); + assert(argx->gtOper != GT_LCL_FLD); + assert(argx->gtOper != GT_CNS_INT); + + // This arg should either have no persistent side effects or be the last one in our table + // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); + + if (argsRemaining == 1) + { + // This is the last arg to place + expensiveArg = curInx; + expensiveArgTabEntry = curArgTabEntry; + assert(begTab == endTab); + break; + } + else + { + if (!costsPrepared) + { + /* We call gtPrepareCost to measure the cost of evaluating this tree */ + compiler->gtPrepareCost(argx); + } + + if (argx->GetCostEx() > expensiveArgCost) + { + // Remember this arg as the most expensive one that we have yet seen + expensiveArgCost = argx->GetCostEx(); + expensiveArg = curInx; + expensiveArgTabEntry = curArgTabEntry; + } + } + } + } + + noway_assert(expensiveArg != UINT_MAX); + + // put the most expensive arg towards the beginning of the table + + expensiveArgTabEntry->processed = true; + + // place expensiveArgTabEntry at the begTab position by performing a swap + // + if (expensiveArg != begTab) + { + argTable[expensiveArg] = argTable[begTab]; + argTable[begTab] = expensiveArgTabEntry; + } + + begTab++; + argsRemaining--; + + costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop + } + + // The table should now be completely filled and thus begTab should now be adjacent to endTab + // and regArgsRemaining should be zero + assert(begTab == (endTab + 1)); + assert(argsRemaining == 0); + +#if !FEATURE_FIXED_OUT_ARGS + // Finally build the regArgList + // + callTree->AsCall()->regArgList = NULL; + callTree->AsCall()->regArgListCount = regCount; + + unsigned regInx = 0; + for (curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + if (curArgTabEntry->GetRegNum() != REG_STK) + { + // Encode the argument register in the register mask + // + callTree->AsCall()->regArgList[regInx] = curArgTabEntry->GetRegNum(); + regInx++; + } + } +#endif // !FEATURE_FIXED_OUT_ARGS + + argsSorted = true; +} + +#ifdef DEBUG +void fgArgInfo::Dump(Compiler* compiler) const +{ + for (unsigned curInx = 0; curInx < ArgCount(); curInx++) + { + fgArgTabEntry* curArgEntry = ArgTable()[curInx]; + curArgEntry->Dump(); + } +} +#endif + +//------------------------------------------------------------------------------ +// fgMakeTmpArgNode : This function creates a tmp var only if needed. +// We need this to be done in order to enforce ordering +// of the evaluation of arguments. +// +// Arguments: +// curArgTabEntry +// +// Return Value: +// the newly created temp var tree. + +GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) +{ + unsigned tmpVarNum = curArgTabEntry->tmpNum; + LclVarDsc* varDsc = &lvaTable[tmpVarNum]; + assert(varDsc->lvIsTemp); + var_types type = varDsc->TypeGet(); + + // Create a copy of the temp to go into the late argument list + GenTree* arg = gtNewLclvNode(tmpVarNum, type); + GenTree* addrNode = nullptr; + + if (varTypeIsStruct(type)) + { + +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) + + // Can this type be passed as a primitive type? + // If so, the following call will return the corresponding primitive type. + // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type. + + bool passedAsPrimitive = false; + if (curArgTabEntry->TryPassAsPrimitive()) + { + CORINFO_CLASS_HANDLE clsHnd = varDsc->GetStructHnd(); + var_types structBaseType = + getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd, curArgTabEntry->IsVararg()); + + if (structBaseType != TYP_UNKNOWN) + { + passedAsPrimitive = true; +#if defined(UNIX_AMD64_ABI) + // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry, + // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take + // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again. + // + if (genIsValidFloatReg(curArgTabEntry->GetRegNum())) + { + if (structBaseType == TYP_INT) + { + structBaseType = TYP_FLOAT; + } + else + { + assert(structBaseType == TYP_LONG); + structBaseType = TYP_DOUBLE; + } + } +#endif + type = structBaseType; + } + } + + // If it is passed in registers, don't get the address of the var. Make it a + // field instead. It will be loaded in registers with putarg_reg tree in lower. + if (passedAsPrimitive) + { + arg->ChangeOper(GT_LCL_FLD); + arg->gtType = type; + } + else + { + var_types addrType = TYP_BYREF; + arg = gtNewOperNode(GT_ADDR, addrType, arg); + addrNode = arg; + +#if FEATURE_MULTIREG_ARGS +#ifdef TARGET_ARM64 + assert(varTypeIsStruct(type)); + if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg())) + { + // We will create a GT_OBJ for the argument below. + // This will be passed by value in two registers. + assert(addrNode != nullptr); + + // Create an Obj of the temp to use it as a call argument. + arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); + } +#else + // Always create an Obj of the temp to use it as a call argument. + arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); +#endif // !TARGET_ARM64 +#endif // FEATURE_MULTIREG_ARGS + } + +#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) + + // other targets, we pass the struct by value + assert(varTypeIsStruct(type)); + + addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg); + + // Get a new Obj node temp to use it as a call argument. + // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object. + arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode); + +#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM) + + } // (varTypeIsStruct(type)) + + if (addrNode != nullptr) + { + assert(addrNode->gtOper == GT_ADDR); + + // This will prevent this LclVar from being optimized away + lvaSetVarAddrExposed(tmpVarNum); + + // the child of a GT_ADDR is required to have this flag set + addrNode->AsOp()->gtOp1->gtFlags |= GTF_DONT_CSE; + } + + return arg; +} + +//------------------------------------------------------------------------------ +// EvalArgsToTemps : Create temp assignments and populate the LateArgs list. + +void fgArgInfo::EvalArgsToTemps() +{ + assert(argsSorted); + + unsigned regArgInx = 0; + // Now go through the argument table and perform the necessary evaluation into temps + GenTreeCall::Use* tmpRegArgNext = nullptr; + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + assert(curArgTabEntry->lateUse == nullptr); + + GenTree* argx = curArgTabEntry->GetNode(); + GenTree* setupArg = nullptr; + GenTree* defArg; + +#if !FEATURE_FIXED_OUT_ARGS + // Only ever set for FEATURE_FIXED_OUT_ARGS + assert(curArgTabEntry->needPlace == false); + + // On x86 and other archs that use push instructions to pass arguments: + // Only the register arguments need to be replaced with placeholder nodes. + // Stacked arguments are evaluated and pushed (or stored into the stack) in order. + // + if (curArgTabEntry->GetRegNum() == REG_STK) + continue; +#endif + + if (curArgTabEntry->needTmp) + { + if (curArgTabEntry->isTmp == true) + { + // Create a copy of the temp to go into the late argument list + defArg = compiler->fgMakeTmpArgNode(curArgTabEntry); + + // mark the original node as a late argument + argx->gtFlags |= GTF_LATE_ARG; + } + else + { + // Create a temp assignment for the argument + // Put the temp in the gtCallLateArgs list + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (compiler->verbose) + { + printf("Argument with 'side effect'...\n"); + compiler->gtDispTree(argx); + } +#endif + +#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) + noway_assert(argx->gtType != TYP_STRUCT); +#endif + + unsigned tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect")); + if (argx->gtOper == GT_MKREFANY) + { + // For GT_MKREFANY, typically the actual struct copying does + // not have any side-effects and can be delayed. So instead + // of using a temp for the whole struct, we can just use a temp + // for operand that that has a side-effect + GenTree* operand; + if ((argx->AsOp()->gtOp2->gtFlags & GTF_ALL_EFFECT) == 0) + { + operand = argx->AsOp()->gtOp1; + + // In the early argument evaluation, place an assignment to the temp + // from the source operand of the mkrefany + setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); + + // Replace the operand for the mkrefany with the new temp. + argx->AsOp()->gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); + } + else if ((argx->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT) == 0) + { + operand = argx->AsOp()->gtOp2; + + // In the early argument evaluation, place an assignment to the temp + // from the source operand of the mkrefany + setupArg = compiler->gtNewTempAssign(tmpVarNum, operand); + + // Replace the operand for the mkrefany with the new temp. + argx->AsOp()->gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet()); + } + } + + if (setupArg != nullptr) + { + // Now keep the mkrefany for the late argument list + defArg = argx; + + // Clear the side-effect flags because now both op1 and op2 have no side-effects + defArg->gtFlags &= ~GTF_ALL_EFFECT; + } + else + { + setupArg = compiler->gtNewTempAssign(tmpVarNum, argx); + + LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum; + var_types lclVarType = genActualType(argx->gtType); + var_types scalarType = TYP_UNKNOWN; + + if (setupArg->OperIsCopyBlkOp()) + { + setupArg = compiler->fgMorphCopyBlock(setupArg); +#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) + if (lclVarType == TYP_STRUCT) + { + // This scalar LclVar widening step is only performed for ARM architectures. + // + CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); + unsigned structSize = varDsc->lvExactSize; + + scalarType = + compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->IsVararg()); + } +#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) + } + + // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => + // 8) + if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) + { + // Create a GT_LCL_FLD using the wider type to go to the late argument list + defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0); + } + else + { + // Create a copy of the temp to go to the late argument list + defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType); + } + + curArgTabEntry->isTmp = true; + curArgTabEntry->tmpNum = tmpVarNum; + +#ifdef TARGET_ARM + // Previously we might have thought the local was promoted, and thus the 'COPYBLK' + // might have left holes in the used registers (see + // fgAddSkippedRegsInPromotedStructArg). + // Too bad we're not that smart for these intermediate temps... + if (isValidIntArgReg(curArgTabEntry->GetRegNum()) && (curArgTabEntry->numRegs > 1)) + { + regNumber argReg = curArgTabEntry->GetRegNum(); + regMaskTP allUsedRegs = genRegMask(curArgTabEntry->GetRegNum()); + for (unsigned i = 1; i < curArgTabEntry->numRegs; i++) + { + argReg = genRegArgNext(argReg); + allUsedRegs |= genRegMask(argReg); + } + } +#endif // TARGET_ARM + } + + /* mark the assignment as a late argument */ + setupArg->gtFlags |= GTF_LATE_ARG; + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n Evaluate to a temp:\n"); + compiler->gtDispTree(setupArg); + } +#endif + } + } + else // curArgTabEntry->needTmp == false + { + // On x86 - + // Only register args are replaced with placeholder nodes + // and the stack based arguments are evaluated and pushed in order. + // + // On Arm/x64 - When needTmp is false and needPlace is false, + // the non-register arguments are evaluated and stored in order. + // When needPlace is true we have a nested call that comes after + // this argument so we have to replace it in the gtCallArgs list + // (the initial argument evaluation list) with a placeholder. + // + if ((curArgTabEntry->GetRegNum() == REG_STK) && (curArgTabEntry->needPlace == false)) + { + continue; + } + + /* No temp needed - move the whole node to the gtCallLateArgs list */ + + /* The argument is deferred and put in the late argument list */ + + defArg = argx; + + // Create a placeholder node to put in its place in gtCallLateArgs. + + // For a struct type we also need to record the class handle of the arg. + CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; + +#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) + + // All structs are either passed (and retyped) as integral types, OR they + // are passed by reference. + noway_assert(argx->gtType != TYP_STRUCT); + +#else // !defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI) + + if (defArg->TypeGet() == TYP_STRUCT) + { + clsHnd = compiler->gtGetStructHandleIfPresent(defArg); + noway_assert(clsHnd != NO_CLASS_HANDLE); + } + +#endif // !(defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) + + setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd); + + /* mark the placeholder node as a late argument */ + setupArg->gtFlags |= GTF_LATE_ARG; + +#ifdef DEBUG + if (compiler->verbose) + { + if (curArgTabEntry->GetRegNum() == REG_STK) + { + printf("Deferred stack argument :\n"); + } + else + { + printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->GetRegNum())); + } + + compiler->gtDispTree(argx); + printf("Replaced with placeholder node:\n"); + compiler->gtDispTree(setupArg); + } +#endif + } + + if (setupArg != nullptr) + { + noway_assert(curArgTabEntry->use->GetNode() == argx); + curArgTabEntry->use->SetNode(setupArg); + } + + /* deferred arg goes into the late argument list */ + + if (tmpRegArgNext == nullptr) + { + tmpRegArgNext = compiler->gtNewCallArgs(defArg); + callTree->AsCall()->gtCallLateArgs = tmpRegArgNext; + } + else + { + noway_assert(tmpRegArgNext->GetNode() != nullptr); + tmpRegArgNext->SetNext(compiler->gtNewCallArgs(defArg)); + + tmpRegArgNext = tmpRegArgNext->GetNext(); + } + + curArgTabEntry->lateUse = tmpRegArgNext; + curArgTabEntry->SetLateArgInx(regArgInx++); + } + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nShuffled argument table: "); + for (unsigned curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntry* curArgTabEntry = argTable[curInx]; + + if (curArgTabEntry->GetRegNum() != REG_STK) + { + printf("%s ", getRegName(curArgTabEntry->GetRegNum())); + } + } + printf("\n"); + } +#endif +} + +// Return a conservative estimate of the stack size in bytes. +// It will be used only on the intercepted-for-host code path to copy the arguments. +int Compiler::fgEstimateCallStackSize(GenTreeCall* call) +{ + int numArgs = 0; + for (GenTreeCall::Use& use : call->Args()) + { + numArgs++; + } + + int numStkArgs; + if (numArgs > MAX_REG_ARG) + { + numStkArgs = numArgs - MAX_REG_ARG; + } + else + { + numStkArgs = 0; + } + + return numStkArgs * REGSIZE_BYTES; +} + +//------------------------------------------------------------------------------ +// fgMakeMultiUse : If the node is a local, clone it, otherwise insert a comma form temp +// +// Arguments: +// ppTree - a pointer to the child node we will be replacing with the comma expression that +// evaluates ppTree to a temp and returns the result +// +// Return Value: +// A fresh GT_LCL_VAR node referencing the temp which has not been used + +GenTree* Compiler::fgMakeMultiUse(GenTree** pOp) +{ + GenTree* tree = *pOp; + if (tree->IsLocal()) + { + return gtClone(tree); + } + else + { + return fgInsertCommaFormTemp(pOp); + } +} + +//------------------------------------------------------------------------------ +// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree, +// and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl) +// +// Arguments: +// ppTree - a pointer to the child node we will be replacing with the comma expression that +// evaluates ppTree to a temp and returns the result +// +// structType - value type handle if the temp created is of TYP_STRUCT. +// +// Return Value: +// A fresh GT_LCL_VAR node referencing the temp which has not been used +// + +GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/) +{ + GenTree* subTree = *ppTree; + + unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable")); + + if (varTypeIsStruct(subTree)) + { + assert(structType != nullptr); + lvaSetStruct(lclNum, structType, false); + } + + // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree. + // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for + // setting type of lcl vars created. + GenTree* asg = gtNewTempAssign(lclNum, subTree); + + GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, subTree->TypeGet(), lclNum); + + GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load); + + *ppTree = comma; + + return new (this, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, subTree->TypeGet(), lclNum); +} + +//------------------------------------------------------------------------ +// fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg +// +// Arguments: +// callNode - the call for which we are generating the fgArgInfo +// +// Return Value: +// None +// +// Notes: +// This method is idempotent in that it checks whether the fgArgInfo has already been +// constructed, and just returns. +// This method only computes the arg table and arg entries for the call (the fgArgInfo), +// and makes no modification of the args themselves. +// +// The IR for the call args can change for calls with non-standard arguments: some non-standard +// arguments add new call argument IR nodes. +// +void Compiler::fgInitArgInfo(GenTreeCall* call) +{ + GenTreeCall::Use* args; + GenTree* argx; + + unsigned argIndex = 0; + unsigned intArgRegNum = 0; + unsigned fltArgRegNum = 0; + DEBUG_ARG_SLOTS_ONLY(unsigned argSlots = 0;) + + bool callHasRetBuffArg = call->HasRetBufArg(); + bool callIsVararg = call->IsVarargs(); + +#ifdef TARGET_ARM + regMaskTP argSkippedRegMask = RBM_NONE; + regMaskTP fltArgSkippedRegMask = RBM_NONE; +#endif // TARGET_ARM + +#if defined(TARGET_X86) + unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated +#else + const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number +#endif + + if (call->fgArgInfo != nullptr) + { + // We've already initialized and set the fgArgInfo. + return; + } + JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); + + // At this point, we should never have gtCallLateArgs, as this needs to be done before those are determined. + assert(call->gtCallLateArgs == nullptr); + +#ifdef TARGET_UNIX + if (callIsVararg) + { + // Currently native varargs is not implemented on non windows targets. + // + // Note that some targets like Arm64 Unix should not need much work as + // the ABI is the same. While other targets may only need small changes + // such as amd64 Unix, which just expects RAX to pass numFPArguments. + NYI("Morphing Vararg call not yet implemented on non Windows targets."); + } +#endif // TARGET_UNIX + + // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed + // following the normal calling convention or in the normal argument registers. We either mark existing + // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the + // non-standard arguments into the argument list, below. + class NonStandardArgs + { + struct NonStandardArg + { + regNumber reg; // The register to be assigned to this non-standard argument. + GenTree* node; // The tree node representing this non-standard argument. + // Note that this must be updated if the tree node changes due to morphing! + }; + + ArrayStack args; + + public: + NonStandardArgs(CompAllocator alloc) : args(alloc, 3) // We will have at most 3 non-standard arguments + { + } + + //----------------------------------------------------------------------------- + // Add: add a non-standard argument to the table of non-standard arguments + // + // Arguments: + // node - a GenTree node that has a non-standard argument. + // reg - the register to assign to this node. + // + // Return Value: + // None. + // + void Add(GenTree* node, regNumber reg) + { + NonStandardArg nsa = {reg, node}; + args.Push(nsa); + } + + //----------------------------------------------------------------------------- + // Find: Look for a GenTree* in the set of non-standard args. + // + // Arguments: + // node - a GenTree node to look for + // + // Return Value: + // The index of the non-standard argument (a non-negative, unique, stable number). + // If the node is not a non-standard argument, return -1. + // + int Find(GenTree* node) + { + for (int i = 0; i < args.Height(); i++) + { + if (node == args.Top(i).node) + { + return i; + } + } + return -1; + } + + //----------------------------------------------------------------------------- + // FindReg: Look for a GenTree node in the non-standard arguments set. If found, + // set the register to use for the node. + // + // Arguments: + // node - a GenTree node to look for + // pReg - an OUT argument. *pReg is set to the non-standard register to use if + // 'node' is found in the non-standard argument set. + // + // Return Value: + // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the + // register to use. + // 'false' otherwise (in this case, *pReg is unmodified). + // + bool FindReg(GenTree* node, regNumber* pReg) + { + for (int i = 0; i < args.Height(); i++) + { + NonStandardArg& nsa = args.TopRef(i); + if (node == nsa.node) + { + *pReg = nsa.reg; + return true; + } + } + return false; + } + + //----------------------------------------------------------------------------- + // Replace: Replace the non-standard argument node at a given index. This is done when + // the original node was replaced via morphing, but we need to continue to assign a + // particular non-standard arg to it. + // + // Arguments: + // index - the index of the non-standard arg. It must exist. + // node - the new GenTree node. + // + // Return Value: + // None. + // + void Replace(int index, GenTree* node) + { + args.TopRef(index).node = node; + } + + } nonStandardArgs(getAllocator(CMK_ArrayStack)); + + // Count of args. On first morph, this is counted before we've filled in the arg table. + // On remorph, we grab it from the arg table. + unsigned numArgs = 0; + + // First we need to count the args + if (call->gtCallThisArg != nullptr) + { + numArgs++; + } + for (GenTreeCall::Use& use : call->Args()) + { + numArgs++; + } + + // Insert or mark non-standard args. These are either outside the normal calling convention, or + // arguments registers that don't follow the normal progression of argument registers in the calling + // convention (such as for the ARM64 fixed return buffer argument x8). + // + // *********** NOTE ************* + // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments + // in the implementation of fast tail call. + // *********** END NOTE ********* + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(TARGET_X86) || defined(TARGET_ARM) + // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention. + // Set the argument registers correctly here. + if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME)) + { + GenTreeCall::Use* args = call->gtCallArgs; + GenTree* arg1 = args->GetNode(); + assert(arg1 != nullptr); + nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME); + } +#endif // defined(TARGET_X86) || defined(TARGET_ARM) +#if defined(TARGET_ARM) + // A non-standard calling convention using wrapper delegate invoke is used on ARM, only, for wrapper + // delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing + // R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs + // to be preserved. Thus, the VM uses a wrapper delegate IL stub, which preserves R4 and also sets up R4 + // correctly for the VSD call. The VM is simply reusing an existing mechanism (wrapper delegate IL stub) + // to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details. + else if (call->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV) + { + GenTree* arg = call->gtCallThisArg->GetNode(); + if (arg->OperIsLocal()) + { + arg = gtClone(arg, true); + } + else + { + GenTree* tmp = fgInsertCommaFormTemp(&arg); + call->gtCallThisArg->SetNode(arg); + call->gtFlags |= GTF_ASG; + arg = tmp; + } + noway_assert(arg != nullptr); + + GenTree* newArg = new (this, GT_ADDR) + GenTreeAddrMode(TYP_BYREF, arg, nullptr, 0, eeGetEEInfo()->offsetOfWrapperDelegateIndirectCell); + + // Append newArg as the last arg + GenTreeCall::Use** insertionPoint = &call->gtCallArgs; + for (; *insertionPoint != nullptr; insertionPoint = &((*insertionPoint)->NextRef())) + { + } + *insertionPoint = gtNewCallArgs(newArg); + + numArgs++; + nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg()); + } +#endif // defined(TARGET_ARM) +#if defined(TARGET_X86) + // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the + // hi part to be in EDX. This sets the argument registers up correctly. + else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || + call->IsHelperCall(this, CORINFO_HELP_LRSZ)) + { + GenTreeCall::Use* args = call->gtCallArgs; + GenTree* arg1 = args->GetNode(); + assert(arg1 != nullptr); + nonStandardArgs.Add(arg1, REG_LNGARG_LO); + + args = args->GetNext(); + GenTree* arg2 = args->GetNode(); + assert(arg2 != nullptr); + nonStandardArgs.Add(arg2, REG_LNGARG_HI); + } +#else // !TARGET_X86 + // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed. + // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling + // convention for x86/SSE. + + // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it. + // + // We don't use the fixed return buffer argument if we have the special unmanaged instance call convention. + // That convention doesn't use the fixed return buffer register. + // + CLANG_FORMAT_COMMENT_ANCHOR; + + if (call->HasFixedRetBufArg()) + { + args = call->gtCallArgs; + assert(args != nullptr); + + argx = call->gtCallArgs->GetNode(); + + // We don't increment numArgs here, since we already counted this argument above. + + nonStandardArgs.Add(argx, theFixedRetBuffReg()); + } + + // We are allowed to have a Fixed Return Buffer argument combined + // with any of the remaining non-standard arguments + // + CLANG_FORMAT_COMMENT_ANCHOR; + + if (call->IsVirtualStub()) + { + if (!call->IsTailCallViaJitHelper()) + { + GenTree* stubAddrArg = fgGetStubAddrArg(call); + // And push the stub address onto the list of arguments + call->gtCallArgs = gtPrependNewCallArg(stubAddrArg, call->gtCallArgs); + + numArgs++; + nonStandardArgs.Add(stubAddrArg, stubAddrArg->GetRegNum()); + } + else + { + // If it is a VSD call getting dispatched via tail call helper, + // fgMorphTailCallViaJitHelper() would materialize stub addr as an additional + // parameter added to the original arg list and hence no need to + // add as a non-standard arg. + } + } + else +#endif // !TARGET_X86 + if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr)) + { + assert(!call->IsUnmanaged()); + + GenTree* arg = call->gtCallCookie; + noway_assert(arg != nullptr); + call->gtCallCookie = nullptr; + +#if defined(TARGET_X86) + // x86 passes the cookie on the stack as the final argument to the call. + GenTreeCall::Use** insertionPoint = &call->gtCallArgs; + for (; *insertionPoint != nullptr; insertionPoint = &((*insertionPoint)->NextRef())) + { + } + *insertionPoint = gtNewCallArgs(arg); +#else // !defined(TARGET_X86) + // All other architectures pass the cookie in a register. + call->gtCallArgs = gtPrependNewCallArg(arg, call->gtCallArgs); +#endif // defined(TARGET_X86) + + nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM); + numArgs++; + + // put destination into R10/EAX + arg = gtClone(call->gtCallAddr, true); + call->gtCallArgs = gtPrependNewCallArg(arg, call->gtCallArgs); + numArgs++; + + nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM); + + // finally change this call to a helper call + call->gtCallType = CT_HELPER; + call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); + } +#if defined(FEATURE_READYTORUN_COMPILER) && defined(TARGET_ARMARCH) + // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg() + // for indirection cell address, which ZapIndirectHelperThunk expects. + if (call->IsR2RRelativeIndir()) + { + assert(call->gtEntryPoint.addr != nullptr); + + size_t addrValue = (size_t)call->gtEntryPoint.addr; + GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR); +#ifdef DEBUG + indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; +#endif + indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM); +#ifdef TARGET_ARM + // Issue #xxxx : Don't attempt to CSE this constant on ARM32 + // + // This constant has specific register requirements, and LSRA doesn't currently correctly + // handle them when the value is in a CSE'd local. + indirectCellAddress->SetDoNotCSE(); +#endif // TARGET_ARM + + // Push the stub address onto the list of arguments. + call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); + + numArgs++; + nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum()); + } + +#endif // FEATURE_READYTORUN_COMPILER && TARGET_ARMARCH + + // Allocate the fgArgInfo for the call node; + // + call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs); + + // Add the 'this' argument value, if present. + if (call->gtCallThisArg != nullptr) + { + argx = call->gtCallThisArg->GetNode(); + assert(argIndex == 0); + assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT); + assert(varTypeIsGC(argx) || (argx->gtType == TYP_I_IMPL)); + + const regNumber regNum = genMapIntRegArgNumToRegNum(intArgRegNum); + const unsigned numRegs = 1; + const unsigned byteSize = TARGET_POINTER_SIZE; + const unsigned byteAlignment = TARGET_POINTER_SIZE; + const bool isStruct = false; + const bool isFloatHfa = false; + + // This is a register argument - put it in the table. + call->fgArgInfo->AddRegArg(argIndex, argx, call->gtCallThisArg, regNum, numRegs, byteSize, byteAlignment, + isStruct, isFloatHfa, + callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(0) + UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); + + intArgRegNum++; +#ifdef WINDOWS_AMD64_ABI + // Whenever we pass an integer register argument + // we skip the corresponding floating point register argument + fltArgRegNum++; +#endif // WINDOWS_AMD64_ABI + argIndex++; + DEBUG_ARG_SLOTS_ONLY(argSlots++;) + } + +#ifdef TARGET_X86 + // Compute the maximum number of arguments that can be passed in registers. + // For X86 we handle the varargs and unmanaged calling conventions + + if (call->gtFlags & GTF_CALL_POP_ARGS) + { + noway_assert(intArgRegNum < MAX_REG_ARG); + // No more register arguments for varargs (CALL_POP_ARGS) + maxRegArgs = intArgRegNum; + + // Add in the ret buff arg + if (callHasRetBuffArg) + maxRegArgs++; + } + + if (call->IsUnmanaged()) + { + noway_assert(intArgRegNum == 0); + + if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL) + { + noway_assert(call->gtCallArgs->GetNode()->TypeGet() == TYP_I_IMPL || + call->gtCallArgs->GetNode()->TypeGet() == TYP_BYREF || + call->gtCallArgs->GetNode()->gtOper == + GT_NOP); // the arg was already morphed to a register (fgMorph called twice) + maxRegArgs = 1; + } + else + { + maxRegArgs = 0; + } +#ifdef UNIX_X86_ABI + // Add in the ret buff arg + if (callHasRetBuffArg) + maxRegArgs++; +#endif + } +#endif // TARGET_X86 + + /* Morph the user arguments */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(TARGET_ARM) + + // The ARM ABI has a concept of back-filling of floating-point argument registers, according + // to the "Procedure Call Standard for the ARM Architecture" document, especially + // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can + // appear in a lower-numbered register than floating point argument N. That is, argument + // register allocation is not strictly increasing. To support this, we need to keep track of unused + // floating-point argument registers that we can back-fill. We only support 4-byte float and + // 8-byte double types, and one to four element HFAs composed of these types. With this, we will + // only back-fill single registers, since there is no way with these types to create + // an alignment hole greater than one register. However, there can be up to 3 back-fill slots + // available (with 16 FP argument registers). Consider this code: + // + // struct HFA { float x, y, z; }; // a three element HFA + // void bar(float a1, // passed in f0 + // double a2, // passed in f2/f3; skip f1 for alignment + // HFA a3, // passed in f4/f5/f6 + // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot + // HFA a5, // passed in f10/f11/f12 + // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill + // // slots + // float a7, // passed in f1 (back-filled) + // float a8, // passed in f7 (back-filled) + // float a9, // passed in f13 (back-filled) + // float a10) // passed on the stack in [OutArg+0] + // + // Note that if we ever support FP types with larger alignment requirements, then there could + // be more than single register back-fills. + // + // Once we assign a floating-pointer register to the stack, they all must be on the stack. + // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling + // continues only so long as no VFP CPRC has been allocated to a slot on the stack." + // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack + // and prevent any additional floating-point arguments from going in registers. + + bool anyFloatStackArgs = false; + +#endif // TARGET_ARM + +#ifdef UNIX_AMD64_ABI + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; +#endif // UNIX_AMD64_ABI + +#if defined(DEBUG) + // Check that we have valid information about call's argument types. + // For example: + // load byte; call(int) -> CALL(PUTARG_TYPE byte(IND byte)); + // load int; call(byte) -> CALL(PUTARG_TYPE int (IND int)); + // etc. + if (call->callSig != nullptr) + { + CORINFO_SIG_INFO* sig = call->callSig; + const unsigned sigArgsCount = sig->numArgs; + + GenTreeCall::Use* nodeArgs = call->gtCallArgs; + // It could include many arguments not included in `sig->numArgs`, for example, `this`, runtime lookup, cookie + // etc. + unsigned nodeArgsCount = call->NumChildren(); + + if (call->gtCallThisArg != nullptr) + { + // Handle the most common argument not in the `sig->numArgs`. + // so the following check works on more methods. + nodeArgsCount--; + } + + assert(nodeArgsCount >= sigArgsCount); + if ((nodeArgsCount == sigArgsCount) && + ((Target::g_tgtArgOrder == Target::ARG_ORDER_R2L) || (nodeArgsCount == 1))) + { + CORINFO_ARG_LIST_HANDLE sigArg = sig->args; + for (unsigned i = 0; i < sig->numArgs; ++i) + { + CORINFO_CLASS_HANDLE argClass; + const CorInfoType corType = strip(info.compCompHnd->getArgType(sig, sigArg, &argClass)); + const var_types sigType = JITtype2varType(corType); + + assert(nodeArgs != nullptr); + const GenTree* nodeArg = nodeArgs->GetNode(); + assert(nodeArg != nullptr); + const var_types nodeType = nodeArg->TypeGet(); + + assert((nodeType == sigType) || varTypeIsStruct(sigType) || + genTypeSize(nodeType) == genTypeSize(sigType)); + + sigArg = info.compCompHnd->getArgNext(sigArg); + nodeArgs = nodeArgs->GetNext(); + } + assert(nodeArgs == nullptr); + } + } +#endif // DEBUG + + for (args = call->gtCallArgs; args != nullptr; args = args->GetNext(), argIndex++) + { + argx = args->GetNode()->gtSkipPutArgType(); + + // Change the node to TYP_I_IMPL so we don't report GC info + // NOTE: We deferred this from the importer because of the inliner. + + if (argx->IsLocalAddrExpr() != nullptr) + { + argx->gtType = TYP_I_IMPL; + } + + // We should never have any ArgPlaceHolder nodes at this point. + assert(!argx->IsArgPlaceHolderNode()); + + // Setup any HFA information about 'argx' + bool isHfaArg = false; + var_types hfaType = TYP_UNDEF; + unsigned hfaSlots = 0; + + bool passUsingFloatRegs; +#if !defined(OSX_ARM64_ABI) + unsigned argAlignBytes = TARGET_POINTER_SIZE; +#endif + unsigned size = 0; + unsigned byteSize = 0; + bool isRegArg = false; + bool isNonStandard = false; + regNumber nonStdRegNum = REG_NA; + + if (GlobalJitOptions::compFeatureHfa) + { + hfaType = GetHfaType(argx); + isHfaArg = varTypeIsValidHfaType(hfaType); + +#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) + // Make sure for vararg methods isHfaArg is not true. + isHfaArg = callIsVararg ? false : isHfaArg; +#endif // defined(TARGET_WINDOWS) && defined(TARGET_ARM64) + + if (isHfaArg) + { + isHfaArg = true; + hfaSlots = GetHfaCount(argx); + + // If we have a HFA struct it's possible we transition from a method that originally + // only had integer types to now start having FP types. We have to communicate this + // through this flag since LSRA later on will use this flag to determine whether + // or not to track the FP register set. + // + compFloatingPointUsed = true; + } + } + + const bool isFloatHfa = (hfaType == TYP_FLOAT); + +#ifdef TARGET_ARM + passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx)) && !opts.compUseSoftFP; + bool passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); + + // We don't use the "size" return value from InferOpSizeAlign(). + codeGen->InferOpSizeAlign(argx, &argAlignBytes); + + argAlignBytes = roundUp(argAlignBytes, TARGET_POINTER_SIZE); + + if (argAlignBytes == 2 * TARGET_POINTER_SIZE) + { + if (passUsingFloatRegs) + { + if (fltArgRegNum % 2 == 1) + { + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); + fltArgRegNum++; + } + } + else if (passUsingIntRegs) + { + if (intArgRegNum % 2 == 1) + { + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + intArgRegNum++; + } + } + +#if defined(DEBUG) + if (argSlots % 2 == 1) + { + argSlots++; + } +#endif + } + +#elif defined(TARGET_ARM64) + + assert(!callIsVararg || !isHfaArg); + passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx)); + +#elif defined(TARGET_AMD64) + + passUsingFloatRegs = varTypeIsFloating(argx); + +#elif defined(TARGET_X86) + + passUsingFloatRegs = false; + +#else +#error Unsupported or unset target architecture +#endif // TARGET* + + bool isBackFilled = false; + unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use + var_types structBaseType = TYP_STRUCT; + unsigned structSize = 0; + bool passStructByRef = false; + + bool isStructArg; + GenTree* actualArg = argx->gtEffectiveVal(true /* Commas only */); + + // + // Figure out the size of the argument. This is either in number of registers, or number of + // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and + // the stack. + // + isStructArg = varTypeIsStruct(argx); + CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE; + if (isStructArg) + { + objClass = gtGetStructHandle(argx); + if (argx->TypeGet() == TYP_STRUCT) + { + // For TYP_STRUCT arguments we must have an OBJ, LCL_VAR or MKREFANY + switch (actualArg->OperGet()) + { + case GT_OBJ: + structSize = actualArg->AsObj()->GetLayout()->GetSize(); + assert(structSize == info.compCompHnd->getClassSize(objClass)); + break; + case GT_LCL_VAR: + structSize = lvaGetDesc(actualArg->AsLclVarCommon())->lvExactSize; + break; + case GT_MKREFANY: + structSize = info.compCompHnd->getClassSize(objClass); + break; + default: + BADCODE("illegal argument tree in fgInitArgInfo"); + break; + } + } + else + { + structSize = genTypeSize(argx); + assert(structSize == info.compCompHnd->getClassSize(objClass)); + } + } +#if defined(TARGET_AMD64) +#ifdef UNIX_AMD64_ABI + if (!isStructArg) + { + size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' + byteSize = genTypeSize(argx); + } + else + { + size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + byteSize = structSize; + eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); + } +#else // !UNIX_AMD64_ABI + size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' + if (!isStructArg) + { + byteSize = genTypeSize(argx); + } + +#endif // UNIX_AMD64_ABI +#elif defined(TARGET_ARM64) + if (isStructArg) + { + if (isHfaArg) + { + // HFA structs are passed by value in multiple registers. + // The "size" in registers may differ the size in pointer-sized units. + CORINFO_CLASS_HANDLE structHnd = gtGetStructHandle(argx); + size = GetHfaCount(structHnd); + byteSize = info.compCompHnd->getClassSize(structHnd); + } + else + { + // Structs are either passed in 1 or 2 (64-bit) slots. + // Structs that are the size of 2 pointers are passed by value in multiple registers, + // if sufficient registers are available. + // Structs that are larger than 2 pointers (except for HFAs) are passed by + // reference (to a copy) + size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + byteSize = structSize; + if (size > 2) + { + size = 1; + } + } + // Note that there are some additional rules for multireg structs. + // (i.e they cannot be split between registers and the stack) + } + else + { + size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot' + byteSize = genTypeSize(argx); + } +#elif defined(TARGET_ARM) || defined(TARGET_X86) + if (isStructArg) + { + size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + byteSize = structSize; + } + else + { + // The typical case. + // Long/double type argument(s) will be modified as needed in Lowering. + size = genTypeStSz(argx->gtType); + byteSize = genTypeSize(argx); + } +#else +#error Unsupported or unset target architecture +#endif // TARGET_XXX + + if (isStructArg) + { + assert(argx == args->GetNode()); + assert(structSize != 0); + + structPassingKind howToPassStruct; + structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, structSize); + passStructByRef = (howToPassStruct == SPK_ByReference); + if (howToPassStruct == SPK_ByReference) + { + byteSize = TARGET_POINTER_SIZE; + } + else + { + byteSize = structSize; + } + + if (howToPassStruct == SPK_PrimitiveType) + { +#ifdef TARGET_ARM + // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct, + // or for a struct of two floats. This causes the struct to be address-taken. + if (structBaseType == TYP_DOUBLE) + { + size = 2; + } + else +#endif // TARGET_ARM + { + size = 1; + } + } + else if (passStructByRef) + { + size = 1; + } + } + + const var_types argType = args->GetNode()->TypeGet(); + if (args->GetNode()->OperIs(GT_PUTARG_TYPE)) + { + byteSize = genTypeSize(argType); + } + + // The 'size' value has now must have been set. (the original value of zero is an invalid value) + assert(size != 0); + assert(byteSize != 0); + +#if defined(OSX_ARM64_ABI) + // Arm64 Apple has a special ABI for passing small size arguments on stack, + // bytes are aligned to 1-byte, shorts to 2-byte, int/float to 4-byte, etc. + // It means passing 8 1-byte arguments on stack can take as small as 8 bytes. + unsigned argAlignBytes = eeGetArgAlignment(argType, isFloatHfa); +#endif + + // + // Figure out if the argument will be passed in a register. + // + + if (isRegParamType(genActualType(argx->TypeGet())) +#ifdef UNIX_AMD64_ABI + && (!isStructArg || structDesc.passedInRegisters) +#elif defined(TARGET_X86) + || (isStructArg && isTrivialPointerSizedStruct(objClass)) +#endif + ) + { +#ifdef TARGET_ARM + if (passUsingFloatRegs) + { + // First, see if it can be back-filled + if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) + (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot? + (size == 1)) // The size to back-fill is one float register + { + // Back-fill the register. + isBackFilled = true; + regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); + fltArgSkippedRegMask &= + ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask + nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); + assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG); + } + + // Does the entire float, double, or HFA fit in the FP arg registers? + // Check if the last register needed is still in the argument register range. + isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG; + + if (!isRegArg) + { + anyFloatStackArgs = true; + } + } + else + { + isRegArg = intArgRegNum < MAX_REG_ARG; + } +#elif defined(TARGET_ARM64) + if (passUsingFloatRegs) + { + // Check if the last register needed is still in the fp argument register range. + isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; + + // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers? + if (isHfaArg && !isRegArg) + { + // recompute the 'size' so that it represent the number of stack slots rather than the number of + // registers + // + unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); + size = roundupSize / TARGET_POINTER_SIZE; + + // We also must update fltArgRegNum so that we no longer try to + // allocate any new floating point registers for args + // This prevents us from backfilling a subsequent arg into d7 + // + fltArgRegNum = MAX_FLOAT_REG_ARG; + } + } + else + { + // Check if the last register needed is still in the int argument register range. + isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; + + // Did we run out of registers when we had a 16-byte struct (size===2) ? + // (i.e we only have one register remaining but we needed two registers to pass this arg) + // This prevents us from backfilling a subsequent arg into x7 + // + if (!isRegArg && (size > 1)) + { +#if defined(TARGET_WINDOWS) + // Arm64 windows native varargs allows splitting a 16 byte struct between stack + // and the last general purpose register. + if (callIsVararg) + { + // Override the decision and force a split. + isRegArg = isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs; + } + else +#endif // defined(TARGET_WINDOWS) + { + // We also must update intArgRegNum so that we no longer try to + // allocate any new general purpose registers for args + // + intArgRegNum = maxRegArgs; + } + } + } +#else // not TARGET_ARM or TARGET_ARM64 + +#if defined(UNIX_AMD64_ABI) + + // Here a struct can be passed in register following the classifications of its members and size. + // Now make sure there are actually enough registers to do so. + if (isStructArg) + { + unsigned int structFloatRegs = 0; + unsigned int structIntRegs = 0; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.IsIntegralSlot(i)) + { + structIntRegs++; + } + else if (structDesc.IsSseSlot(i)) + { + structFloatRegs++; + } + } + + isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) && + ((intArgRegNum + structIntRegs) <= MAX_REG_ARG); + } + else + { + if (passUsingFloatRegs) + { + isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG; + } + else + { + isRegArg = intArgRegNum < MAX_REG_ARG; + } + } +#else // !defined(UNIX_AMD64_ABI) + isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; +#endif // !defined(UNIX_AMD64_ABI) +#endif // TARGET_ARM + } + else + { + isRegArg = false; + } + + // If there are nonstandard args (outside the calling convention) they were inserted above + // and noted them in a table so we can recognize them here and build their argInfo. + // + // They should not affect the placement of any other args or stack space required. + // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. + isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum); + if (isNonStandard) + { + isRegArg = (nonStdRegNum != REG_STK); + } + else if (call->IsTailCallViaJitHelper()) + { + // We have already (before calling fgMorphArgs()) appended the 4 special args + // required by the x86 tailcall helper. These args are required to go on the + // stack. Force them to the stack here. + assert(numArgs >= 4); + if (argIndex >= numArgs - 4) + { + isRegArg = false; + } + } + + // Now we know if the argument goes in registers or not and how big it is. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef TARGET_ARM + // If we ever allocate a floating point argument to the stack, then all + // subsequent HFA/float/double arguments go on the stack. + if (!isRegArg && passUsingFloatRegs) + { + for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum) + { + fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); + } + } + + // If we think we're going to split a struct between integer registers and the stack, check to + // see if we've already assigned a floating-point arg to the stack. + if (isRegArg && // We decided above to use a register for the argument + !passUsingFloatRegs && // We're using integer registers + (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack + anyFloatStackArgs) // We've already used the stack for a floating-point argument + { + isRegArg = false; // Change our mind; don't pass this struct partially in registers + + // Skip the rest of the integer argument registers + for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum) + { + argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); + } + } +#endif // TARGET_ARM + + // Now create the fgArgTabEntry. + fgArgTabEntry* newArgEntry; + if (isRegArg) + { + regNumber nextRegNum = REG_STK; + +#if defined(UNIX_AMD64_ABI) + regNumber nextOtherRegNum = REG_STK; + unsigned int structFloatRegs = 0; + unsigned int structIntRegs = 0; +#endif // defined(UNIX_AMD64_ABI) + + if (isNonStandard) + { + nextRegNum = nonStdRegNum; + } +#if defined(UNIX_AMD64_ABI) + else if (isStructArg && structDesc.passedInRegisters) + { + // It is a struct passed in registers. Assign the next available register. + assert((structDesc.eightByteCount <= 2) && "Too many eightbytes."); + regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum}; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.IsIntegralSlot(i)) + { + *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs); + ++structIntRegs; + } + else if (structDesc.IsSseSlot(i)) + { + *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs); + ++structFloatRegs; + } + } + } +#endif // defined(UNIX_AMD64_ABI) + else + { + // fill in or update the argInfo table + nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) + : genMapIntRegArgNumToRegNum(intArgRegNum); + } + +#ifdef TARGET_AMD64 +#ifndef UNIX_AMD64_ABI + assert(size == 1); +#endif +#endif + + // This is a register argument - put it in the table + newArgEntry = + call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg, + isFloatHfa, callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) + UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) + UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) + UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); + newArgEntry->SetIsBackFilled(isBackFilled); + newArgEntry->isNonStandard = isNonStandard; + + // Set up the next intArgRegNum and fltArgRegNum values. + if (!isBackFilled) + { +#if defined(UNIX_AMD64_ABI) + if (isStructArg) + { + // For this case, we've already set the regNums in the argTabEntry + intArgRegNum += structIntRegs; + fltArgRegNum += structFloatRegs; + } + else +#endif // defined(UNIX_AMD64_ABI) + { + if (!isNonStandard) + { +#if FEATURE_ARG_SPLIT + // Check for a split (partially enregistered) struct + if (!passUsingFloatRegs && ((intArgRegNum + size) > MAX_REG_ARG)) + { + // This indicates a partial enregistration of a struct type + assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() || + (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG))); + unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum; + assert((unsigned char)numRegsPartial == numRegsPartial); + call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); + } +#endif // FEATURE_ARG_SPLIT + + if (passUsingFloatRegs) + { + fltArgRegNum += size; + +#ifdef WINDOWS_AMD64_ABI + // Whenever we pass an integer register argument + // we skip the corresponding floating point register argument + intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG); +#endif // WINDOWS_AMD64_ABI + // No supported architecture supports partial structs using float registers. + assert(fltArgRegNum <= MAX_FLOAT_REG_ARG); + } + else + { + // Increment intArgRegNum by 'size' registers + intArgRegNum += size; + +#ifdef WINDOWS_AMD64_ABI + fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG); +#endif // WINDOWS_AMD64_ABI + } + } + } + } + } + else // We have an argument that is not passed in a register + { + // This is a stack argument - put it in the table + newArgEntry = call->fgArgInfo->AddStkArg(argIndex, argx, args, size, byteSize, argAlignBytes, isStructArg, + isFloatHfa, callIsVararg); +#ifdef UNIX_AMD64_ABI + // TODO-Amd64-Unix-CQ: This is temporary (see also in fgMorphArgs). + if (structDesc.passedInRegisters) + { + newArgEntry->structDesc.CopyFrom(structDesc); + } +#endif + } + + if (GlobalJitOptions::compFeatureHfa) + { + if (isHfaArg) + { + newArgEntry->SetHfaType(hfaType, hfaSlots); + } + } + + newArgEntry->SetMultiRegNums(); + + noway_assert(newArgEntry != nullptr); + if (newArgEntry->isStruct) + { + newArgEntry->passedByRef = passStructByRef; + newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; + } + else + { + newArgEntry->argType = argx->TypeGet(); + } + + DEBUG_ARG_SLOTS_ONLY(argSlots += size;) + } // end foreach argument loop + +#ifdef DEBUG + if (verbose) + { + JITDUMP("ArgTable for %d.%s after fgInitArgInfo:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); + call->fgArgInfo->Dump(this); + JITDUMP("\n"); + } +#endif +} + +//------------------------------------------------------------------------ +// fgMorphArgs: Walk and transform (morph) the arguments of a call +// +// Arguments: +// callNode - the call for which we are doing the argument morphing +// +// Return Value: +// Like most morph methods, this method returns the morphed node, +// though in this case there are currently no scenarios where the +// node itself is re-created. +// +// Notes: +// This calls fgInitArgInfo to create the 'fgArgInfo' for the call. +// If it has already been created, that method will simply return. +// +// This method changes the state of the call node. It uses the existence +// of gtCallLateArgs (the late arguments list) to determine if it has +// already done the first round of morphing. +// +// The first time it is called (i.e. during global morphing), this method +// computes the "late arguments". This is when it determines which arguments +// need to be evaluated to temps prior to the main argument setup, and which +// can be directly evaluated into the argument location. It also creates a +// second argument list (gtCallLateArgs) that does the final placement of the +// arguments, e.g. into registers or onto the stack. +// +// The "non-late arguments", aka the gtCallArgs, are doing the in-order +// evaluation of the arguments that might have side-effects, such as embedded +// assignments, calls or possible throws. In these cases, it and earlier +// arguments must be evaluated to temps. +// +// On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS), +// if we have any nested calls, we need to defer the copying of the argument +// into the fixed argument area until after the call. If the argument did not +// otherwise need to be computed into a temp, it is moved to gtCallLateArgs and +// replaced in the "early" arg list (gtCallArgs) with a placeholder node. + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) +{ + GenTreeCall::Use* args; + GenTree* argx; + + unsigned flagsSummary = 0; + + unsigned argIndex = 0; + + DEBUG_ARG_SLOTS_ONLY(unsigned argSlots = 0;) + + bool reMorphing = call->AreArgsComplete(); + + // Set up the fgArgInfo. + fgInitArgInfo(call); + JITDUMP("%sMorphing args for %d.%s:\n", (reMorphing) ? "Re" : "", call->gtTreeID, GenTree::OpName(call->gtOper)); + + // If we are remorphing, process the late arguments (which were determined by a previous caller). + if (reMorphing) + { + for (GenTreeCall::Use& use : call->LateArgs()) + { + use.SetNode(fgMorphTree(use.GetNode())); + flagsSummary |= use.GetNode()->gtFlags; + } + + assert(call->fgArgInfo != nullptr); + } + call->fgArgInfo->RemorphReset(); + + // First we morph the argument subtrees ('this' pointer, arguments, etc.). + // During the first call to fgMorphArgs we also record the + // information about late arguments we have in 'fgArgInfo'. + // This information is used later to contruct the gtCallLateArgs */ + + // Process the 'this' argument value, if present. + if (call->gtCallThisArg != nullptr) + { + argx = call->gtCallThisArg->GetNode(); + fgArgTabEntry* thisArgEntry = call->fgArgInfo->GetArgEntry(0, reMorphing); + argx = fgMorphTree(argx); + call->gtCallThisArg->SetNode(argx); + // This is a register argument - possibly update it in the table. + call->fgArgInfo->UpdateRegArg(thisArgEntry, argx, reMorphing); + flagsSummary |= argx->gtFlags; + + if (!reMorphing && call->IsExpandedEarly() && call->IsVirtualVtable()) + { + if (!argx->OperIsLocal()) + { + thisArgEntry->needTmp = true; + call->fgArgInfo->SetNeedsTemps(); + } + } + assert(argIndex == 0); + argIndex++; + DEBUG_ARG_SLOTS_ONLY(argSlots++;) + } + + // Note that this name is a bit of a misnomer - it indicates that there are struct args + // that occupy more than a single slot that are passed by value (not necessarily in regs). + bool hasMultiregStructArgs = false; + for (args = call->gtCallArgs; args != nullptr; args = args->GetNext(), argIndex++) + { + GenTree** parentArgx = &args->NodeRef(); + fgArgTabEntry* argEntry = call->fgArgInfo->GetArgEntry(argIndex, reMorphing); + + // Morph the arg node, and update the parent and argEntry pointers. + argx = *parentArgx; + argx = fgMorphTree(argx); + *parentArgx = argx; + assert(argx == args->GetNode()); + + DEBUG_ARG_SLOTS_ONLY(unsigned size = argEntry->getSize();) + CORINFO_CLASS_HANDLE copyBlkClass = NO_CLASS_HANDLE; + +#if defined(DEBUG_ARG_SLOTS) + if (argEntry->GetByteAlignment() == 2 * TARGET_POINTER_SIZE) + { + if (argSlots % 2 == 1) + { + argSlots++; + } + } +#endif // DEBUG + if (argEntry->isNonStandard) + { + // We need to update the node field for this nonStandard arg here + // as it may have been changed by the call to fgMorphTree. + call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); + flagsSummary |= argx->gtFlags; + continue; + } + DEBUG_ARG_SLOTS_ASSERT(size != 0); + DEBUG_ARG_SLOTS_ONLY(argSlots += argEntry->getSlotCount();) + + if (argx->IsLocalAddrExpr() != nullptr) + { + argx->gtType = TYP_I_IMPL; + } + + // Get information about this argument. + var_types hfaType = argEntry->GetHfaType(); + bool isHfaArg = (hfaType != TYP_UNDEF); + bool passUsingFloatRegs = argEntry->isPassedInFloatRegisters(); + unsigned structSize = 0; + + // Struct arguments may be morphed into a node that is not a struct type. + // In such case the fgArgTabEntry keeps track of whether the original node (before morphing) + // was a struct and the struct classification. + bool isStructArg = argEntry->isStruct; + + GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/); + if (isStructArg && varTypeIsStruct(argObj) && !argObj->OperIs(GT_ASG, GT_MKREFANY, GT_FIELD_LIST, GT_ARGPLACE)) + { + CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(argObj); + unsigned originalSize; + if (argObj->TypeGet() == TYP_STRUCT) + { + if (argObj->OperIs(GT_OBJ)) + { + // Get the size off the OBJ node. + originalSize = argObj->AsObj()->GetLayout()->GetSize(); + assert(originalSize == info.compCompHnd->getClassSize(objClass)); + } + else + { + // We have a BADCODE assert for this in fgInitArgInfo. + assert(argObj->OperIs(GT_LCL_VAR)); + originalSize = lvaGetDesc(argObj->AsLclVarCommon())->lvExactSize; + } + } + else + { + originalSize = genTypeSize(argx); + assert(originalSize == info.compCompHnd->getClassSize(objClass)); + } + unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE); + var_types structBaseType = argEntry->argType; + + // First, handle the case where the argument is passed by reference. + if (argEntry->passedByRef) + { + DEBUG_ARG_SLOTS_ASSERT(size == 1); + copyBlkClass = objClass; +#ifdef UNIX_AMD64_ABI + assert(!"Structs are not passed by reference on x64/ux"); +#endif // UNIX_AMD64_ABI + } + else // This is passed by value. + { + // Check to see if we can transform this into load of a primitive type. + // 'size' must be the number of pointer sized items + DEBUG_ARG_SLOTS_ASSERT(size == roundupSize / TARGET_POINTER_SIZE); + + structSize = originalSize; + unsigned passingSize = originalSize; + + // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size. + // When it can do this is platform-dependent: + // - In general, it can be done for power of 2 structs that fit in a single register. + // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field. + // - This is irrelevant for X86, since structs are always passed by value on the stack. + + GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj); + bool canTransform = false; + + if (structBaseType != TYP_STRUCT) + { + if (isPow2(passingSize)) + { + canTransform = (!argEntry->IsHfaArg() || (passingSize == genTypeSize(argEntry->GetHfaType()))); + } + +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) + // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can + // only transform in that case if the arg is a local. + // TODO-CQ: This transformation should be applicable in general, not just for the ARM64 + // or UNIX_AMD64_ABI cases where they will be passed in registers. + else + { + canTransform = (lclVar != nullptr); + passingSize = genTypeSize(structBaseType); + } +#endif // TARGET_ARM64 || UNIX_AMD64_ABI + } + + if (!canTransform) + { +#if defined(TARGET_AMD64) +#ifndef UNIX_AMD64_ABI + // On Windows structs are always copied and passed by reference (handled above) unless they are + // passed by value in a single register. + assert(size == 1); + copyBlkClass = objClass; +#else // UNIX_AMD64_ABI + // On Unix, structs are always passed by value. + // We only need a copy if we have one of the following: + // - The sizes don't match for a non-lclVar argument. + // - We have a known struct type (e.g. SIMD) that requires multiple registers. + // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not + // actually passed in registers. + if (argEntry->isPassedInRegisters()) + { + if (argObj->OperIs(GT_OBJ)) + { + if (passingSize != structSize) + { + copyBlkClass = objClass; + } + } + else if (lclVar == nullptr) + { + // This should only be the case of a value directly producing a known struct type. + assert(argObj->TypeGet() != TYP_STRUCT); + if (argEntry->numRegs > 1) + { + copyBlkClass = objClass; + } + } + } +#endif // UNIX_AMD64_ABI +#elif defined(TARGET_ARM64) + if ((passingSize != structSize) && (lclVar == nullptr)) + { + copyBlkClass = objClass; + } +#endif + +#ifdef TARGET_ARM + // TODO-1stClassStructs: Unify these conditions across targets. + if (((lclVar != nullptr) && + (lvaGetPromotionType(lclVar->AsLclVarCommon()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT)) || + ((argObj->OperIs(GT_OBJ)) && (passingSize != structSize))) + { + copyBlkClass = objClass; + } + + if (structSize < TARGET_POINTER_SIZE) + { + copyBlkClass = objClass; + } +#endif // TARGET_ARM + } + else + { + // We have a struct argument that fits into a register, and it is either a power of 2, + // or a local. + // Change our argument, as needed, into a value of the appropriate type. + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef TARGET_ARM + DEBUG_ARG_SLOTS_ASSERT((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2))); +#else + DEBUG_ARG_SLOTS_ASSERT((size == 1) || (varTypeIsSIMD(structBaseType) && + size == (genTypeSize(structBaseType) / REGSIZE_BYTES))); +#endif + + assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize)); + + if (argObj->OperIs(GT_OBJ)) + { + argObj->ChangeOper(GT_IND); + + // Now see if we can fold *(&X) into X + if (argObj->AsOp()->gtOp1->gtOper == GT_ADDR) + { + GenTree* temp = argObj->AsOp()->gtOp1->AsOp()->gtOp1; + + // Keep the DONT_CSE flag in sync + // (as the addr always marks it for its op1) + temp->gtFlags &= ~GTF_DONT_CSE; + temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE); + DEBUG_DESTROY_NODE(argObj->AsOp()->gtOp1); // GT_ADDR + DEBUG_DESTROY_NODE(argObj); // GT_IND + + argObj = temp; + *parentArgx = temp; + argx = temp; + } + } + if (argObj->gtOper == GT_LCL_VAR) + { + unsigned lclNum = argObj->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (varDsc->lvPromoted) + { + if (varDsc->lvFieldCnt == 1) + { + // get the first and only promoted field + LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; + if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize) + { + // we will use the first and only promoted field + argObj->AsLclVarCommon()->SetLclNum(varDsc->lvFieldLclStart); + + if (varTypeIsEnregisterable(fieldVarDsc->TypeGet()) && + (genTypeSize(fieldVarDsc->TypeGet()) == originalSize)) + { + // Just use the existing field's type + argObj->gtType = fieldVarDsc->TypeGet(); + } + else + { + // Can't use the existing field's type, so use GT_LCL_FLD to swizzle + // to a new type + argObj->ChangeOper(GT_LCL_FLD); + argObj->gtType = structBaseType; + } + assert(varTypeIsEnregisterable(argObj->TypeGet())); + assert(copyBlkClass == NO_CLASS_HANDLE); + } + else + { + // use GT_LCL_FLD to swizzle the single field struct to a new type + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); + argObj->ChangeOper(GT_LCL_FLD); + argObj->gtType = structBaseType; + } + } + else + { + // The struct fits into a single register, but it has been promoted into its + // constituent fields, and so we have to re-assemble it + copyBlkClass = objClass; + } + } + else if (genTypeSize(varDsc->TypeGet()) != genTypeSize(structBaseType)) + { + // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD + argObj->ChangeOper(GT_LCL_FLD); + argObj->gtType = structBaseType; + } + } + else + { + // Not a GT_LCL_VAR, so we can just change the type on the node + argObj->gtType = structBaseType; + } + assert(varTypeIsEnregisterable(argObj->TypeGet()) || + ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType))); + } + +#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) + // TODO-CQ-XARCH: there is no need for a temp copy if we improve our code generation in + // `genPutStructArgStk` for xarch like we did it for Arm/Arm64. + + // We still have a struct unless we converted the GT_OBJ into a GT_IND above... + if (isHfaArg && passUsingFloatRegs) + { + } + else if (structBaseType == TYP_STRUCT) + { + // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, + // we must copyblk to a temp before doing the obj to avoid + // the obj reading memory past the end of the valuetype + CLANG_FORMAT_COMMENT_ANCHOR; + + if (roundupSize > originalSize) + { + copyBlkClass = objClass; + + // There are a few special cases where we can omit using a CopyBlk + // where we normally would need to use one. + + if (argObj->OperIs(GT_OBJ) && + argObj->AsObj()->gtGetOp1()->IsLocalAddrExpr() != nullptr) // Is the source a LclVar? + { + copyBlkClass = NO_CLASS_HANDLE; + } + } + } + +#endif // !UNIX_AMD64_ABI + } + } + + if (argEntry->isPassedInRegisters()) + { + call->fgArgInfo->UpdateRegArg(argEntry, argx, reMorphing); + } + else + { + call->fgArgInfo->UpdateStkArg(argEntry, argx, reMorphing); + } + + if (copyBlkClass != NO_CLASS_HANDLE) + { + fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass); + } + + if (argx->gtOper == GT_MKREFANY) + { + // 'Lower' the MKREFANY tree and insert it. + noway_assert(!reMorphing); + +#ifdef TARGET_X86 + // Build the mkrefany as a GT_FIELD_LIST + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + fieldList->AddField(this, argx->AsOp()->gtGetOp1(), OFFSETOF__CORINFO_TypedReference__dataPtr, TYP_BYREF); + fieldList->AddField(this, argx->AsOp()->gtGetOp2(), OFFSETOF__CORINFO_TypedReference__type, TYP_I_IMPL); + fgArgTabEntry* fp = gtArgEntryByNode(call, argx); + args->SetNode(fieldList); + assert(fp->GetNode() == fieldList); +#else // !TARGET_X86 + + // Get a new temp + // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany + unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument")); + lvaSetStruct(tmp, impGetRefAnyClass(), false); + + // Build the mkrefany as a comma node: + // (tmp.ptr=argx),(tmp.type=handle) + GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__dataPtr); + GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, OFFSETOF__CORINFO_TypedReference__type); + destPtrSlot->SetFieldSeq(GetFieldSeqStore()->CreateSingleton(GetRefanyDataField())); + destPtrSlot->gtFlags |= GTF_VAR_DEF; + destTypeSlot->SetFieldSeq(GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField())); + destTypeSlot->gtFlags |= GTF_VAR_DEF; + + GenTree* asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->AsOp()->gtOp1); + GenTree* asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->AsOp()->gtOp2); + GenTree* asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot); + + // Change the expression to "(tmp=val)" + args->SetNode(asg); + + // EvalArgsToTemps will cause tmp to actually get loaded as the argument + call->fgArgInfo->EvalToTmp(argEntry, tmp, asg); + lvaSetVarAddrExposed(tmp); +#endif // !TARGET_X86 + } + +#if FEATURE_MULTIREG_ARGS + if (isStructArg) + { + if (((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) || + (isHfaArg && argx->TypeGet() == TYP_STRUCT)) + { + hasMultiregStructArgs = true; + } + } +#ifdef TARGET_ARM + else if ((argEntry->argType == TYP_LONG) || (argEntry->argType == TYP_DOUBLE)) + { + assert((argEntry->numRegs == 2) || (argEntry->numSlots == 2)); + } +#endif + else + { + // We must have exactly one register or slot. + assert(((argEntry->numRegs == 1) && (argEntry->GetStackSlotsNumber() == 0)) || + ((argEntry->numRegs == 0) && (argEntry->GetStackSlotsNumber() == 1))); + } +#endif + +#if defined(TARGET_X86) + if (isStructArg) + { + GenTree* lclNode = argx->OperIs(GT_LCL_VAR) ? argx : fgIsIndirOfAddrOfLocal(argx); + if ((lclNode != nullptr) && + (lvaGetPromotionType(lclNode->AsLclVarCommon()->GetLclNum()) == Compiler::PROMOTION_TYPE_INDEPENDENT)) + { + // Make a GT_FIELD_LIST of the field lclVars. + GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon(); + LclVarDsc* varDsc = lvaGetDesc(lcl); + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + + fgArgTabEntry* fp = gtArgEntryByNode(call, argx); + args->SetNode(fieldList); + assert(fp->GetNode() == fieldList); + + for (unsigned fieldLclNum = varDsc->lvFieldLclStart; + fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum) + { + LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); + GenTree* fieldLcl; + + if (fieldLclNum == varDsc->lvFieldLclStart) + { + lcl->SetLclNum(fieldLclNum); + lcl->ChangeOper(GT_LCL_VAR); + lcl->gtType = fieldVarDsc->TypeGet(); + fieldLcl = lcl; + } + else + { + fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); + } + + fieldList->AddField(this, fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); + } + } + } +#endif // TARGET_X86 + + flagsSummary |= args->GetNode()->gtFlags; + + } // end foreach argument loop + + if (!reMorphing) + { + call->fgArgInfo->ArgsComplete(); + } + + /* Process the function address, if indirect call */ + + if (call->gtCallType == CT_INDIRECT) + { + call->gtCallAddr = fgMorphTree(call->gtCallAddr); + // Const CSE may create an assignment node here + flagsSummary |= call->gtCallAddr->gtFlags; + } + +#if FEATURE_FIXED_OUT_ARGS + + // Record the outgoing argument size. If the call is a fast tail + // call, it will setup its arguments in incoming arg area instead + // of the out-going arg area, so we don't need to track the + // outgoing arg size. + if (!call->IsFastTailCall()) + { + +#if defined(UNIX_AMD64_ABI) + // This is currently required for the UNIX ABI to work correctly. + opts.compNeedToAlignFrame = true; +#endif // UNIX_AMD64_ABI + + const unsigned outgoingArgSpaceSize = GetOutgoingArgByteSize(call->fgArgInfo->GetNextSlotByteOffset()); + +#if defined(DEBUG_ARG_SLOTS) + unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum(); + assert(outgoingArgSpaceSize == preallocatedArgCount * REGSIZE_BYTES); +#endif + call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL)); + +#ifdef DEBUG + if (verbose) + { + const fgArgInfo* argInfo = call->fgArgInfo; +#if defined(DEBUG_ARG_SLOTS) + printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, nextSlotByteOffset=%d, " + "outgoingArgSpaceSize=%d\n", + argSlots, preallocatedArgCount, argInfo->GetNextSlotNum(), argInfo->GetNextSlotByteOffset(), + outgoingArgSpaceSize); +#else + printf("nextSlotByteOffset=%d, outgoingArgSpaceSize=%d\n", argInfo->GetNextSlotByteOffset(), + outgoingArgSpaceSize); +#endif + } +#endif + } +#endif // FEATURE_FIXED_OUT_ARGS + + // Clear the ASG and EXCEPT (if possible) flags on the call node + call->gtFlags &= ~GTF_ASG; + if (!call->OperMayThrow(this)) + { + call->gtFlags &= ~GTF_EXCEPT; + } + + // Union in the side effect flags from the call's operands + call->gtFlags |= flagsSummary & GTF_ALL_EFFECT; + + // If we are remorphing or don't have any register arguments or other arguments that need + // temps, then we don't need to call SortArgs() and EvalArgsToTemps(). + // + if (!reMorphing && (call->fgArgInfo->HasRegArgs() || call->fgArgInfo->NeedsTemps())) + { + // Do the 'defer or eval to temp' analysis. + + call->fgArgInfo->SortArgs(); + + call->fgArgInfo->EvalArgsToTemps(); + } + + if (hasMultiregStructArgs) + { + fgMorphMultiregStructArgs(call); + } + +#ifdef DEBUG + if (verbose) + { + JITDUMP("ArgTable for %d.%s after fgMorphArgs:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); + call->fgArgInfo->Dump(this); + JITDUMP("\n"); + } +#endif + return call; +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +//----------------------------------------------------------------------------- +// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and +// call fgMorphMultiregStructArg on each of them. +// +// Arguments: +// call : a GenTreeCall node that has one or more TYP_STRUCT arguments\. +// +// Notes: +// We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types. +// It will ensure that the struct arguments are in the correct form. +// If this method fails to find any TYP_STRUCT arguments it will assert. +// +void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) +{ + bool foundStructArg = false; + unsigned flagsSummary = 0; + +#ifdef TARGET_X86 + assert(!"Logic error: no MultiregStructArgs for X86"); +#endif +#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) + assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI"); +#endif + + for (GenTreeCall::Use& use : call->Args()) + { + // For late arguments the arg tree that is overridden is in the gtCallLateArgs list. + // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.) + // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping + // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself, + // otherwise points to the list in the late args list. + bool isLateArg = (use.GetNode()->gtFlags & GTF_LATE_ARG) != 0; + fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, use.GetNode()); + assert(fgEntryPtr != nullptr); + GenTree* argx = fgEntryPtr->GetNode(); + GenTreeCall::Use* lateUse = nullptr; + GenTree* lateNode = nullptr; + + if (isLateArg) + { + for (GenTreeCall::Use& lateArgUse : call->LateArgs()) + { + GenTree* argNode = lateArgUse.GetNode(); + if (argx == argNode) + { + lateUse = &lateArgUse; + lateNode = argNode; + break; + } + } + assert((lateUse != nullptr) && (lateNode != nullptr)); + } + + if (!fgEntryPtr->isStruct) + { + continue; + } + + unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->GetStackSlotsNumber()); + if ((size > 1) || (fgEntryPtr->IsHfaArg() && argx->TypeGet() == TYP_STRUCT)) + { + foundStructArg = true; + if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) + { + if (fgEntryPtr->IsHfaRegArg()) + { + var_types hfaType = fgEntryPtr->GetHfaType(); + unsigned structSize; + if (argx->OperIs(GT_OBJ)) + { + structSize = argx->AsObj()->GetLayout()->GetSize(); + } + else if (varTypeIsSIMD(argx)) + { + structSize = genTypeSize(argx); + } + else + { + assert(argx->OperIs(GT_LCL_VAR)); + structSize = lvaGetDesc(argx->AsLclVar()->GetLclNum())->lvExactSize; + } + assert(structSize > 0); + if (structSize == genTypeSize(hfaType)) + { + if (argx->OperIs(GT_OBJ)) + { + argx->SetOper(GT_IND); + } + + argx->gtType = hfaType; + } + } + + GenTree* newArgx = fgMorphMultiregStructArg(argx, fgEntryPtr); + + // Did we replace 'argx' with a new tree? + if (newArgx != argx) + { + // link the new arg node into either the late arg list or the gtCallArgs list + if (isLateArg) + { + lateUse->SetNode(newArgx); + } + else + { + use.SetNode(newArgx); + } + + assert(fgEntryPtr->GetNode() == newArgx); + } + } + } + } + + // We should only call this method when we actually have one or more multireg struct args + assert(foundStructArg); + + // Update the flags + call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT); +} + +//----------------------------------------------------------------------------- +// fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list, +// morph the argument as needed to be passed correctly. +// +// Arguments: +// arg - A GenTree node containing a TYP_STRUCT arg +// fgEntryPtr - the fgArgTabEntry information for the current 'arg' +// +// Notes: +// The arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT. +// If 'arg' is a lclVar passed on the stack, we will ensure that any lclVars that must be on the +// stack are marked as doNotEnregister, and then we return. +// +// If it is passed by register, we mutate the argument into the GT_FIELD_LIST form +// which is only used for struct arguments. +// +// If arg is a LclVar we check if it is struct promoted and has the right number of fields +// and if they are at the appropriate offsets we will use the struct promted fields +// in the GT_FIELD_LIST nodes that we create. +// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements +// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct +// this also forces the struct to be stack allocated into the local frame. +// For the GT_OBJ case will clone the address expression and generate two (or more) +// indirections. +// Currently the implementation handles ARM64/ARM and will NYI for other architectures. +// +GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr) +{ + assert(varTypeIsStruct(arg->TypeGet())); + +#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) + NYI("fgMorphMultiregStructArg requires implementation for this target"); +#endif + +#ifdef TARGET_ARM + if ((fgEntryPtr->IsSplit() && fgEntryPtr->GetStackSlotsNumber() + fgEntryPtr->numRegs > 4) || + (!fgEntryPtr->IsSplit() && fgEntryPtr->GetRegNum() == REG_STK)) +#else + if (fgEntryPtr->GetRegNum() == REG_STK) +#endif + { + GenTreeLclVarCommon* lcl = nullptr; + GenTree* actualArg = arg->gtEffectiveVal(); + + if (actualArg->OperGet() == GT_OBJ) + { + if (actualArg->gtGetOp1()->OperIs(GT_ADDR) && actualArg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR)) + { + lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVarCommon(); + } + } + else if (actualArg->OperGet() == GT_LCL_VAR) + { + lcl = actualArg->AsLclVarCommon(); + } + if (lcl != nullptr) + { + if (lvaGetPromotionType(lcl->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT) + { + arg = fgMorphLclArgToFieldlist(lcl); + } + else if (arg->TypeGet() == TYP_STRUCT) + { + // If this is a non-register struct, it must be referenced from memory. + if (!actualArg->OperIs(GT_OBJ)) + { + // Create an Obj of the temp to use it as a call argument. + arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg); + arg = gtNewObjNode(lvaGetStruct(lcl->GetLclNum()), arg); + } + // Its fields will need to be accessed by address. + lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUG_ARG(DNER_IsStructArg)); + } + } + + return arg; + } + +#if FEATURE_MULTIREG_ARGS + // Examine 'arg' and setup argValue objClass and structSize + // + const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg); + GenTree* argValue = arg; // normally argValue will be arg, but see right below + unsigned structSize = 0; + + if (arg->TypeGet() != TYP_STRUCT) + { + structSize = genTypeSize(arg->TypeGet()); + assert(structSize == info.compCompHnd->getClassSize(objClass)); + } + else if (arg->OperGet() == GT_OBJ) + { + GenTreeObj* argObj = arg->AsObj(); + const ClassLayout* objLayout = argObj->GetLayout(); + structSize = objLayout->GetSize(); + assert(structSize == info.compCompHnd->getClassSize(objClass)); + + // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR. + GenTree* op1 = argObj->gtOp1; + if (op1->OperGet() == GT_ADDR) + { + GenTree* underlyingTree = op1->AsOp()->gtOp1; + + // Only update to the same type. + if (underlyingTree->OperIs(GT_LCL_VAR)) + { + const GenTreeLclVar* lclVar = underlyingTree->AsLclVar(); + const LclVarDsc* varDsc = lvaGetDesc(lclVar); + if (ClassLayout::AreCompatible(varDsc->GetLayout(), objLayout)) + { + argValue = underlyingTree; + } + } + } + } + else if (arg->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* varNode = arg->AsLclVarCommon(); + unsigned varNum = varNode->GetLclNum(); + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + + structSize = varDsc->lvExactSize; + assert(structSize == info.compCompHnd->getClassSize(objClass)); + } + else + { + structSize = info.compCompHnd->getClassSize(objClass); + } + + var_types hfaType = TYP_UNDEF; + var_types elemType = TYP_UNDEF; + unsigned elemCount = 0; + unsigned elemSize = 0; + var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0 + + hfaType = fgEntryPtr->GetHfaType(); + if (varTypeIsValidHfaType(hfaType) +#if !defined(HOST_UNIX) && defined(TARGET_ARM64) + && !fgEntryPtr->IsVararg() +#endif // !defined(HOST_UNIX) && defined(TARGET_ARM64) + ) + { + elemType = hfaType; + elemSize = genTypeSize(elemType); + elemCount = structSize / elemSize; + assert(elemSize * elemCount == structSize); + for (unsigned inx = 0; inx < elemCount; inx++) + { + type[inx] = elemType; + } + } + else + { + assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE); + BYTE gcPtrs[MAX_ARG_REG_COUNT]; + elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; + info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + + for (unsigned inx = 0; inx < elemCount; inx++) + { +#ifdef UNIX_AMD64_ABI + if (gcPtrs[inx] == TYPE_GC_NONE) + { + type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx], + fgEntryPtr->structDesc.eightByteSizes[inx]); + } + else +#endif // UNIX_AMD64_ABI + { + type[inx] = getJitGCType(gcPtrs[inx]); + } + } + +#ifndef UNIX_AMD64_ABI + if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) + { + elemSize = TARGET_POINTER_SIZE; + // We can safely widen this to aligned bytes since we are loading from + // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and + // lives in the stack frame or will be a promoted field. + // + structSize = elemCount * TARGET_POINTER_SIZE; + } + else // we must have a GT_OBJ + { + assert(argValue->OperGet() == GT_OBJ); + + // We need to load the struct from an arbitrary address + // and we can't read past the end of the structSize + // We adjust the last load type here + // + unsigned remainingBytes = structSize % TARGET_POINTER_SIZE; + unsigned lastElem = elemCount - 1; + if (remainingBytes != 0) + { + switch (remainingBytes) + { + case 1: + type[lastElem] = TYP_BYTE; + break; + case 2: + type[lastElem] = TYP_SHORT; + break; +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) + case 4: + type[lastElem] = TYP_INT; + break; +#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) + default: + noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg"); + break; + } + } + } +#endif // !UNIX_AMD64_ABI + } + + // We should still have a TYP_STRUCT + assert(varTypeIsStruct(argValue->TypeGet())); + + GenTreeFieldList* newArg = nullptr; + + // Are we passing a struct LclVar? + // + if (argValue->OperGet() == GT_LCL_VAR) + { + GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); + unsigned varNum = varNode->GetLclNum(); + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + + // At this point any TYP_STRUCT LclVar must be an aligned struct + // or an HFA struct, both which are passed by value. + // + assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa()); + + varDsc->lvIsMultiRegArg = true; + +#ifdef DEBUG + if (verbose) + { + JITDUMP("Multireg struct argument V%02u : ", varNum); + fgEntryPtr->Dump(); + } +#endif // DEBUG + +#ifndef UNIX_AMD64_ABI + // This local variable must match the layout of the 'objClass' type exactly + if (varDsc->lvIsHfa() +#if !defined(HOST_UNIX) && defined(TARGET_ARM64) + && !fgEntryPtr->IsVararg() +#endif // !defined(HOST_UNIX) && defined(TARGET_ARM64) + ) + { + // We have a HFA struct. + noway_assert(elemType == varDsc->GetHfaType()); + noway_assert(elemSize == genTypeSize(elemType)); + noway_assert(elemCount == (varDsc->lvExactSize / elemSize)); + noway_assert(elemSize * elemCount == varDsc->lvExactSize); + + for (unsigned inx = 0; (inx < elemCount); inx++) + { + noway_assert(type[inx] == elemType); + } + } + else + { +#if defined(TARGET_ARM64) + // We must have a 16-byte struct (non-HFA) + noway_assert(elemCount == 2); +#elif defined(TARGET_ARM) + noway_assert(elemCount <= 4); +#endif + + for (unsigned inx = 0; inx < elemCount; inx++) + { + var_types currentGcLayoutType = varDsc->GetLayout()->GetGCPtrType(inx); + + // We setup the type[inx] value above using the GC info from 'objClass' + // This GT_LCL_VAR must have the same GC layout info + // + if (varTypeIsGC(currentGcLayoutType)) + { + noway_assert(type[inx] == currentGcLayoutType); + } + else + { + // We may have use a small type when we setup the type[inx] values above + // We can safely widen this to TYP_I_IMPL + type[inx] = TYP_I_IMPL; + } + } + } +#endif // !UNIX_AMD64_ABI + +#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) + // Is this LclVar a promoted struct with exactly 2 fields? + // TODO-ARM64-CQ: Support struct promoted HFA types here + if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && (!varDsc->lvIsHfa() +#if !defined(HOST_UNIX) && defined(TARGET_ARM64) + && !fgEntryPtr->IsVararg() +#endif // !defined(HOST_UNIX) && defined(TARGET_ARM64) + )) + { + // See if we have two promoted fields that start at offset 0 and 8? + unsigned loVarNum = lvaGetFieldLocal(varDsc, 0); + unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE); + + // Did we find the promoted fields at the necessary offsets? + if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM)) + { + LclVarDsc* loVarDsc = &lvaTable[loVarNum]; + LclVarDsc* hiVarDsc = &lvaTable[hiVarNum]; + + var_types loType = loVarDsc->lvType; + var_types hiType = hiVarDsc->lvType; + + if ((varTypeIsFloating(loType) != genIsValidFloatReg(fgEntryPtr->GetRegNum(0))) || + (varTypeIsFloating(hiType) != genIsValidFloatReg(fgEntryPtr->GetRegNum(1)))) + { + // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer + // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) + // + JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", + varNum); + // + // we call lvaSetVarDoNotEnregister and do the proper transformation below. + // + } + else + { + // We can use the struct promoted field as the two arguments + + // Create a new tree for 'arg' + // replace the existing LDOBJ(ADDR(LCLVAR)) + // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr)) + // + + newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); + newArg->AddField(this, gtNewLclvNode(loVarNum, loType), 0, loType); + newArg->AddField(this, gtNewLclvNode(hiVarNum, hiType), TARGET_POINTER_SIZE, hiType); + } + } + } + else + { + // + // We will create a list of GT_LCL_FLDs nodes to pass this struct + // + lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); + } +#elif defined(TARGET_ARM) + // Is this LclVar a promoted struct with exactly same size? + if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa()) + { + // See if we have promoted fields? + unsigned varNums[4]; + bool hasBadVarNum = false; + for (unsigned inx = 0; inx < elemCount; inx++) + { + varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx); + if (varNums[inx] == BAD_VAR_NUM) + { + hasBadVarNum = true; + break; + } + } + + // Did we find the promoted fields at the necessary offsets? + if (!hasBadVarNum) + { + LclVarDsc* varDscs[4]; + var_types varType[4]; + bool varIsFloat = false; + + for (unsigned inx = 0; inx < elemCount; inx++) + { + varDscs[inx] = &lvaTable[varNums[inx]]; + varType[inx] = varDscs[inx]->lvType; + if (varTypeIsFloating(varType[inx])) + { + // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the + // integer + // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) + // + JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", + varNum); + // + // we call lvaSetVarDoNotEnregister and do the proper transformation below. + // + varIsFloat = true; + break; + } + } + + if (!varIsFloat) + { + newArg = fgMorphLclArgToFieldlist(varNode); + } + } + } + else + { + // + // We will create a list of GT_LCL_FLDs nodes to pass this struct + // + lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); + } +#endif // TARGET_ARM + } + + // If we didn't set newarg to a new List Node tree + // + if (newArg == nullptr) + { + if (fgEntryPtr->GetRegNum() == REG_STK) + { + // We leave this stack passed argument alone + return arg; + } + + // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted ) + // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it? + // + if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) + { + GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon(); + unsigned varNum = varNode->GetLclNum(); + assert(varNum < lvaCount); + LclVarDsc* varDsc = &lvaTable[varNum]; + + unsigned baseOffset = varNode->GetLclOffs(); + unsigned lastOffset = baseOffset + structSize; + + // The allocated size of our LocalVar must be at least as big as lastOffset + assert(varDsc->lvSize() >= lastOffset); + + if (varDsc->HasGCPtr()) + { + // alignment of the baseOffset is required + noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0); +#ifndef UNIX_AMD64_ABI + noway_assert(elemSize == TARGET_POINTER_SIZE); +#endif + unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE; + ClassLayout* layout = varDsc->GetLayout(); + for (unsigned inx = 0; (inx < elemCount); inx++) + { + // The GC information must match what we setup using 'objClass' + if (layout->IsGCPtr(baseIndex + inx) || varTypeGCtype(type[inx])) + { + noway_assert(type[inx] == layout->GetGCPtrType(baseIndex + inx)); + } + } + } + else // this varDsc contains no GC pointers + { + for (unsigned inx = 0; inx < elemCount; inx++) + { + // The GC information must match what we setup using 'objClass' + noway_assert(!varTypeIsGC(type[inx])); + } + } + + // + // We create a list of GT_LCL_FLDs nodes to pass this struct + // + lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); + + // Create a new tree for 'arg' + // replace the existing LDOBJ(ADDR(LCLVAR)) + // with a FIELD_LIST(LCLFLD-LO, LCLFLD-HI) + // + unsigned offset = baseOffset; + newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); + for (unsigned inx = 0; inx < elemCount; inx++) + { + GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); + newArg->AddField(this, nextLclFld, offset, type[inx]); + offset += genTypeSize(type[inx]); + } + } + // Are we passing a GT_OBJ struct? + // + else if (argValue->OperGet() == GT_OBJ) + { + GenTreeObj* argObj = argValue->AsObj(); + GenTree* baseAddr = argObj->gtOp1; + var_types addrType = baseAddr->TypeGet(); + + if (baseAddr->OperGet() == GT_ADDR) + { + GenTree* addrTaken = baseAddr->AsOp()->gtOp1; + if (addrTaken->IsLocal()) + { + GenTreeLclVarCommon* varNode = addrTaken->AsLclVarCommon(); + unsigned varNum = varNode->GetLclNum(); + // We access non-struct type (for example, long) as a struct type. + // Make sure lclVar lives on stack to make sure its fields are accessible by address. + lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LocalField)); + } + } + + // Create a new tree for 'arg' + // replace the existing LDOBJ(EXPR) + // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...) + // + + newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(); + unsigned offset = 0; + for (unsigned inx = 0; inx < elemCount; inx++) + { + GenTree* curAddr = baseAddr; + if (offset != 0) + { + GenTree* baseAddrDup = gtCloneExpr(baseAddr); + noway_assert(baseAddrDup != nullptr); + curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL)); + } + else + { + curAddr = baseAddr; + } + GenTree* curItem = gtNewIndir(type[inx], curAddr); + + // For safety all GT_IND should have at least GT_GLOB_REF set. + curItem->gtFlags |= GTF_GLOB_REF; + + newArg->AddField(this, curItem, offset, type[inx]); + offset += genTypeSize(type[inx]); + } + } + } + +#ifdef DEBUG + // If we reach here we should have set newArg to something + if (newArg == nullptr) + { + gtDispTree(argValue); + assert(!"Missing case in fgMorphMultiregStructArg"); + } +#endif + + noway_assert(newArg != nullptr); + +#ifdef DEBUG + if (verbose) + { + printf("fgMorphMultiregStructArg created tree:\n"); + gtDispTree(newArg); + } +#endif + + arg = newArg; // consider calling fgMorphTree(newArg); + +#endif // FEATURE_MULTIREG_ARGS + + return arg; +} + +//------------------------------------------------------------------------ +// fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields +// +// Arguments: +// lcl - The GT_LCL_VAR node we will transform +// +// Return value: +// The new GT_FIELD_LIST that we have created. +// +GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl) +{ + LclVarDsc* varDsc = lvaGetDesc(lcl); + assert(varDsc->lvPromoted); + unsigned fieldCount = varDsc->lvFieldCnt; + unsigned fieldLclNum = varDsc->lvFieldLclStart; + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + for (unsigned i = 0; i < fieldCount; i++) + { + LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); + GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); + fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); + fieldLclNum++; + } + return fieldList; +} + +//------------------------------------------------------------------------ +// fgMakeOutgoingStructArgCopy: make a copy of a struct variable if necessary, +// to pass to a callee. +// +// Arguments: +// call - call being processed +// args - args for the call +/// argIndex - arg being processed +// copyBlkClass - class handle for the struct +// +// Return value: +// tree that computes address of the outgoing arg +// +void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, + GenTreeCall::Use* args, + unsigned argIndex, + CORINFO_CLASS_HANDLE copyBlkClass) +{ + GenTree* argx = args->GetNode(); + noway_assert(argx->gtOper != GT_MKREFANY); + fgArgTabEntry* argEntry = Compiler::gtArgEntryByNode(call, argx); + + // If we're optimizing, see if we can avoid making a copy. + // + // We don't need a copy if this is the last use of an implicit by-ref local. + // + if (opts.OptimizationEnabled()) + { + GenTreeLclVar* const lcl = argx->IsImplicitByrefParameterValue(this); + + if (lcl != nullptr) + { + const unsigned varNum = lcl->GetLclNum(); + LclVarDsc* const varDsc = lvaGetDesc(varNum); + const unsigned short totalAppearances = varDsc->lvRefCnt(RCS_EARLY); + + // We don't have liveness so we rely on other indications of last use. + // + // We handle these cases: + // + // * (must not copy) If the call is a tail call, the use is a last use. + // We must skip the copy if we have a fast tail call. + // + // * (may not copy) if the call is noreturn, the use is a last use. + // We also check for just one reference here as we are not doing + // alias analysis of the call's parameters, or checking if the call + // site is not within some try region. + // + // * (may not copy) if there is exactly one use of the local in the method, + // and the call is not in loop, this is a last use. + // + const bool isTailCallLastUse = call->IsTailCall(); + const bool isCallLastUse = (totalAppearances == 1) && !fgMightHaveLoop(); + const bool isNoReturnLastUse = (totalAppearances == 1) && call->IsNoReturn(); + if (isTailCallLastUse || isCallLastUse || isNoReturnLastUse) + { + varDsc->setLvRefCnt(0, RCS_EARLY); + args->SetNode(lcl); + assert(argEntry->GetNode() == lcl); + + JITDUMP("did not need to make outgoing copy for last use of implicit byref V%2d\n", varNum); + return; + } + } + } + + JITDUMP("making an outgoing copy for struct arg\n"); + + if (fgOutgoingArgTemps == nullptr) + { + fgOutgoingArgTemps = hashBv::Create(this); + } + + unsigned tmp = 0; + bool found = false; + + // Attempt to find a local we have already used for an outgoing struct and reuse it. + // We do not reuse within a statement. + if (!opts.MinOpts()) + { + indexType lclNum; + FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps) + { + LclVarDsc* varDsc = &lvaTable[lclNum]; + if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) && + !fgCurrentlyInUseArgTemps->testBit(lclNum)) + { + tmp = (unsigned)lclNum; + found = true; + JITDUMP("reusing outgoing struct arg"); + break; + } + } + NEXT_HBV_BIT_SET; + } + + // Create the CopyBlk tree and insert it. + if (!found) + { + // Get a new temp + // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk. + tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument")); + lvaSetStruct(tmp, copyBlkClass, false); + if (call->IsVarargs()) + { + lvaSetStructUsedAsVarArg(tmp); + } + + fgOutgoingArgTemps->setBit(tmp); + } + + fgCurrentlyInUseArgTemps->setBit(tmp); + + // TYP_SIMD structs should not be enregistered, since ABI requires it to be + // allocated on stack and address of it needs to be passed. + if (lclVarIsSIMDType(tmp)) + { + lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct)); + } + + // Create a reference to the temp + GenTree* dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType); + dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction. + + if (argx->gtOper == GT_OBJ) + { + argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT); + argx->SetIndirExceptionFlags(this); + } + else + { + argx->gtFlags |= GTF_DONT_CSE; + } + + // Copy the valuetype to the temp + GenTree* copyBlk = gtNewBlkOpNode(dest, argx, false /* not volatile */, true /* copyBlock */); + copyBlk = fgMorphCopyBlock(copyBlk); + +#if FEATURE_FIXED_OUT_ARGS + + // Do the copy early, and evalute the temp later (see EvalArgsToTemps) + // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode + GenTree* arg = copyBlk; + +#else // FEATURE_FIXED_OUT_ARGS + + // Structs are always on the stack, and thus never need temps + // so we have to put the copy and temp all into one expression. + argEntry->tmpNum = tmp; + GenTree* arg = fgMakeTmpArgNode(argEntry); + + // Change the expression to "(tmp=val),tmp" + arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg); + +#endif // FEATURE_FIXED_OUT_ARGS + + args->SetNode(arg); + call->fgArgInfo->EvalToTmp(argEntry, tmp, arg); + + return; +} + +#ifdef TARGET_ARM +// See declaration for specification comment. +void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, + unsigned firstArgRegNum, + regMaskTP* pArgSkippedRegMask) +{ + assert(varDsc->lvPromoted); + // There's no way to do these calculations without breaking abstraction and assuming that + // integer register arguments are consecutive ints. They are on ARM. + + // To start, figure out what register contains the last byte of the first argument. + LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart]; + unsigned lastFldRegOfLastByte = + (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; + ; + + // Now we're keeping track of the register that the last field ended in; see what registers + // subsequent fields start in, and whether any are skipped. + // (We assume here the invariant that the fields are sorted in offset order.) + for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++) + { + unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset; + LclVarDsc* fldVarDsc = &lvaTable[fldVarNum]; + unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE; + assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields. + // This loop should enumerate the offsets of any registers skipped. + // Find what reg contains the last byte: + // And start at the first register after that. If that isn't the first reg of the current + for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset; + skippedRegOffsets++) + { + // If the register number would not be an arg reg, we're done. + if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG) + return; + *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets)); + } + lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE; + } +} + +#endif // TARGET_ARM + +/***************************************************************************** + * + * A little helper used to rearrange nested commutative operations. The + * effect is that nested associative, commutative operations are transformed + * into a 'left-deep' tree, i.e. into something like this: + * + * (((a op b) op c) op d) op... + */ + +#if REARRANGE_ADDS + +void Compiler::fgMoveOpsLeft(GenTree* tree) +{ + GenTree* op1; + GenTree* op2; + genTreeOps oper; + + do + { + op1 = tree->AsOp()->gtOp1; + op2 = tree->AsOp()->gtOp2; + oper = tree->OperGet(); + + noway_assert(GenTree::OperIsCommutative(oper)); + noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL); + noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder); + noway_assert(oper == op2->gtOper); + + // Commutativity doesn't hold if overflow checks are needed + + if (tree->gtOverflowEx() || op2->gtOverflowEx()) + { + return; + } + + if (gtIsActiveCSE_Candidate(op2)) + { + // If we have marked op2 as a CSE candidate, + // we can't perform a commutative reordering + // because any value numbers that we computed for op2 + // will be incorrect after performing a commutative reordering + // + return; + } + + if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT)) + { + return; + } + + // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators + if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)) + { + return; + } + + if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN) + { + // We could deal with this, but we were always broken and just hit the assert + // below regarding flags, which means it's not frequent, so will just bail out. + // See #195514 + return; + } + + noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx()); + + GenTree* ad1 = op2->AsOp()->gtOp1; + GenTree* ad2 = op2->AsOp()->gtOp2; + + // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT + // We can not reorder such GT_OR trees + // + if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet())) + { + break; + } + + // Don't split up a byref calculation and create a new byref. E.g., + // [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int). + // Doing this transformation could create a situation where the first + // addition (that is, [byref]+ (ref, int) ) creates a byref pointer that + // no longer points within the ref object. If a GC happens, the byref won't + // get updated. This can happen, for instance, if one of the int components + // is negative. It also requires the address generation be in a fully-interruptible + // code region. + // + if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL) + { + assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD)); + break; + } + + /* Change "(x op (y op z))" to "(x op y) op z" */ + /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */ + + GenTree* new_op1 = op2; + + new_op1->AsOp()->gtOp1 = op1; + new_op1->AsOp()->gtOp2 = ad1; + + /* Change the flags. */ + + // Make sure we arent throwing away any flags + noway_assert((new_op1->gtFlags & + ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag. + GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated + GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0); + + new_op1->gtFlags = + (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag. + (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT); + + /* Retype new_op1 if it has not/become a GC ptr. */ + + if (varTypeIsGC(op1->TypeGet())) + { + noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && + oper == GT_ADD) || // byref(ref + (int+int)) + (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL && + oper == GT_OR)); // int(gcref | int(gcref|intval)) + + new_op1->gtType = tree->gtType; + } + else if (varTypeIsGC(ad2->TypeGet())) + { + // Neither ad1 nor op1 are GC. So new_op1 isnt either + noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL); + new_op1->gtType = TYP_I_IMPL; + } + + // If new_op1 is a new expression. Assign it a new unique value number. + // vnStore is null before the ValueNumber phase has run + if (vnStore != nullptr) + { + // We can only keep the old value number on new_op1 if both op1 and ad2 + // have the same non-NoVN value numbers. Since op is commutative, comparing + // only ad2 and op1 is enough. + if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || + (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) || + (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal())) + { + new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet())); + } + } + + tree->AsOp()->gtOp1 = new_op1; + tree->AsOp()->gtOp2 = ad2; + + /* If 'new_op1' is now the same nested op, process it recursively */ + + if ((ad1->gtOper == oper) && !ad1->gtOverflowEx()) + { + fgMoveOpsLeft(new_op1); + } + + /* If 'ad2' is now the same nested op, process it + * Instead of recursion, we set up op1 and op2 for the next loop. + */ + + op1 = new_op1; + op2 = ad2; + } while ((op2->gtOper == oper) && !op2->gtOverflowEx()); + + return; +} + +#endif + +/*****************************************************************************/ + +void Compiler::fgSetRngChkTarget(GenTree* tree, bool delay) +{ + if (tree->OperIsBoundsCheck()) + { + GenTreeBoundsChk* const boundsChk = tree->AsBoundsChk(); + BasicBlock* const failBlock = fgSetRngChkTargetInner(boundsChk->gtThrowKind, delay); + if (failBlock != nullptr) + { + boundsChk->gtIndRngFailBB = failBlock; + } + } + else if (tree->OperIs(GT_INDEX_ADDR)) + { + GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr(); + BasicBlock* const failBlock = fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); + if (failBlock != nullptr) + { + indexAddr->gtIndRngFailBB = failBlock; + } + } + else + { + noway_assert(tree->OperIs(GT_ARR_ELEM, GT_ARR_INDEX)); + fgSetRngChkTargetInner(SCK_RNGCHK_FAIL, delay); + } +} + +BasicBlock* Compiler::fgSetRngChkTargetInner(SpecialCodeKind kind, bool delay) +{ + if (opts.MinOpts()) + { + delay = false; + } + + if (!opts.compDbgCode) + { + if (!delay && !compIsForInlining()) + { + // Create/find the appropriate "range-fail" label + return fgRngChkTarget(compCurBB, kind); + } + } + + return nullptr; +} + +/***************************************************************************** + * + * Expand a GT_INDEX node and fully morph the child operands + * + * The orginal GT_INDEX node is bashed into the GT_IND node that accesses + * the array element. We expand the GT_INDEX node into a larger tree that + * evaluates the array base and index. The simplest expansion is a GT_COMMA + * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag. + * For complex array or index expressions one or more GT_COMMA assignments + * are inserted so that we only evaluate the array or index expressions once. + * + * The fully expanded tree is then morphed. This causes gtFoldExpr to + * perform local constant prop and reorder the constants in the tree and + * fold them. + * + * We then parse the resulting array element expression in order to locate + * and label the constants and variables that occur in the tree. + */ + +const int MAX_ARR_COMPLEXITY = 4; +const int MAX_INDEX_COMPLEXITY = 4; + +GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) +{ + noway_assert(tree->gtOper == GT_INDEX); + GenTreeIndex* asIndex = tree->AsIndex(); + var_types elemTyp = asIndex->TypeGet(); + unsigned elemSize = asIndex->gtIndElemSize; + CORINFO_CLASS_HANDLE elemStructType = asIndex->gtStructElemClass; + + noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr); + + // Fold "cns_str"[cns_index] to ushort constant + if (opts.OptimizationEnabled() && asIndex->Arr()->OperIs(GT_CNS_STR) && asIndex->Index()->IsIntCnsFitsInI32()) + { + const int cnsIndex = static_cast(asIndex->Index()->AsIntConCommon()->IconValue()); + if (cnsIndex >= 0) + { + int length; + const char16_t* str = info.compCompHnd->getStringLiteral(asIndex->Arr()->AsStrCon()->gtScpHnd, + asIndex->Arr()->AsStrCon()->gtSconCPX, &length); + if ((cnsIndex < length) && (str != nullptr)) + { + GenTree* cnsCharNode = gtNewIconNode(str[cnsIndex], elemTyp); + INDEBUG(cnsCharNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return cnsCharNode; + } + } + } + +#ifdef FEATURE_SIMD + if (featureSIMD && varTypeIsStruct(elemTyp) && structSizeMightRepresentSIMDType(elemSize)) + { + // If this is a SIMD type, this is the point at which we lose the type information, + // so we need to set the correct type on the GT_IND. + // (We don't care about the base type here, so we only check, but don't retain, the return value). + unsigned simdElemSize = 0; + if (getBaseJitTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != CORINFO_TYPE_UNDEF) + { + assert(simdElemSize == elemSize); + elemTyp = getSIMDTypeForSize(elemSize); + // This is the new type of the node. + tree->gtType = elemTyp; + // Now set elemStructType to null so that we don't confuse value numbering. + elemStructType = nullptr; + } + } +#endif // FEATURE_SIMD + + // Set up the array length's offset into lenOffs + // And the first element's offset into elemOffs + ssize_t lenOffs; + ssize_t elemOffs; + if (tree->gtFlags & GTF_INX_STRING_LAYOUT) + { + lenOffs = OFFSETOF__CORINFO_String__stringLen; + elemOffs = OFFSETOF__CORINFO_String__chars; + tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE + } + else + { + // We have a standard array + lenOffs = OFFSETOF__CORINFO_Array__length; + elemOffs = OFFSETOF__CORINFO_Array__data; + } + + // In minopts, we expand GT_INDEX to GT_IND(GT_INDEX_ADDR) in order to minimize the size of the IR. As minopts + // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down. + // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion + // performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in + // minopts). + // + // When we *are* optimizing, we fully expand GT_INDEX to: + // 1. Evaluate the array address expression and store the result in a temp if the expression is complex or + // side-effecting. + // 2. Evaluate the array index expression and store the result in a temp if the expression is complex or + // side-effecting. + // 3. Perform an explicit bounds check: GT_ARR_BOUNDS_CHK(index, GT_ARR_LENGTH(array)) + // 4. Compute the address of the element that will be accessed: + // GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize)) + // 5. Dereference the address with a GT_IND. + // + // This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows + // for more straightforward bounds-check removal, CSE, etc. + if (opts.MinOpts()) + { + GenTree* const array = fgMorphTree(asIndex->Arr()); + GenTree* const index = fgMorphTree(asIndex->Index()); + + GenTreeIndexAddr* const indexAddr = + new (this, GT_INDEX_ADDR) GenTreeIndexAddr(array, index, elemTyp, elemStructType, elemSize, + static_cast(lenOffs), static_cast(elemOffs)); + indexAddr->gtFlags |= (array->gtFlags | index->gtFlags) & GTF_ALL_EFFECT; + + // Mark the indirection node as needing a range check if necessary. + // Note this will always be true unless JitSkipArrayBoundCheck() is used + if ((indexAddr->gtFlags & GTF_INX_RNGCHK) != 0) + { + fgSetRngChkTarget(indexAddr); + } + + // Change `tree` into an indirection and return. + tree->ChangeOper(GT_IND); + GenTreeIndir* const indir = tree->AsIndir(); + indir->Addr() = indexAddr; + bool canCSE = indir->CanCSE(); + indir->gtFlags = GTF_IND_ARR_INDEX | (indexAddr->gtFlags & GTF_ALL_EFFECT); + if (!canCSE) + { + indir->SetDoNotCSE(); + } + +#ifdef DEBUG + indexAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + + return indir; + } + + GenTree* arrRef = asIndex->Arr(); + GenTree* index = asIndex->Index(); + + bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled + bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0); + + GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression + GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression + GenTree* bndsChk = nullptr; + + // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address. + if (chkd) + { + GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression + GenTree* index2 = nullptr; + + // If the arrRef or index expressions involves an assignment, a call or reads from global memory, + // then we *must* allocate a temporary in which to "localize" those values, to ensure that the + // same values are used in the bounds check and the actual dereference. + // Also we allocate the temporary when the expresion is sufficiently complex/expensive. + // + // Note that if the expression is a GT_FIELD, it has not yet been morphed so its true complexity is + // not exposed. Without that condition there are cases of local struct fields that were previously, + // needlessly, marked as GTF_GLOB_REF, and when that was fixed, there were some regressions that + // were mostly ameliorated by adding this condition. + // + // Likewise, allocate a temporary if the expression is a GT_LCL_FLD node. These used to be created + // after fgMorphArrayIndex from GT_FIELD trees so this preserves the existing behavior. This is + // perhaps a decision that should be left to CSE but FX diffs show that it is slightly better to + // do this here. + + if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || + gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || arrRef->OperIs(GT_FIELD, GT_LCL_FLD)) + { + unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr")); + arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef); + arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); + arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet()); + } + else + { + arrRef2 = gtCloneExpr(arrRef); + noway_assert(arrRef2 != nullptr); + } + + if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) || + index->OperIs(GT_FIELD, GT_LCL_FLD)) + { + unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("index expr")); + indexDefn = gtNewTempAssign(indexTmpNum, index); + index = gtNewLclvNode(indexTmpNum, index->TypeGet()); + index2 = gtNewLclvNode(indexTmpNum, index->TypeGet()); + } + else + { + index2 = gtCloneExpr(index); + noway_assert(index2 != nullptr); + } + + // Next introduce a GT_ARR_BOUNDS_CHECK node + var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check. + +#ifdef TARGET_64BIT + // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case + // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case, + // the comparison will have to be widen to 64 bits. + if (index->TypeGet() == TYP_I_IMPL) + { + bndsChkType = TYP_I_IMPL; + } +#endif // TARGET_64BIT + + GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB); + + if (bndsChkType != TYP_INT) + { + arrLen = gtNewCastNode(bndsChkType, arrLen, false, bndsChkType); + } + + GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) + GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL); + + bndsChk = arrBndsChk; + + // Now we'll switch to using the second copies for arrRef and index + // to compute the address expression + + arrRef = arrRef2; + index = index2; + } + + // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))" + + GenTree* addr; + +#ifdef TARGET_64BIT + // Widen 'index' on 64-bit targets + if (index->TypeGet() != TYP_I_IMPL) + { + if (index->OperGet() == GT_CNS_INT) + { + index->gtType = TYP_I_IMPL; + } + else + { + index = gtNewCastNode(TYP_I_IMPL, index, false, TYP_I_IMPL); + } + } +#endif // TARGET_64BIT + + /* Scale the index value if necessary */ + if (elemSize > 1) + { + GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL); + + // Fix 392756 WP7 Crossgen + // + // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node + // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar. + // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE. + // + size->gtFlags |= GTF_DONT_CSE; + + /* Multiply by the array element size */ + addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size); + } + else + { + addr = index; + } + + // Be careful to only create the byref pointer when the full index expression is added to the array reference. + // We don't want to create a partial byref address expression that doesn't include the full index offset: + // a byref must point within the containing object. It is dangerous (especially when optimizations come into + // play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that + // the partial byref will not point within the object, and thus not get updated correctly during a GC. + // This is mostly a risk in fully-interruptible code regions. + // + // NOTE: the tree form created here is pattern matched by optExtractArrIndex(), so changes here must + // be reflected there. + + /* Add the first element's offset */ + + GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL); + + addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns); + + /* Add the object ref to the element's offset */ + + addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); + + assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) || + (GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL)); + + // Change the orginal GT_INDEX node into a GT_IND node + tree->SetOper(GT_IND); + + // If the index node is a floating-point type, notify the compiler + // we'll potentially use floating point registers at the time of codegen. + if (varTypeUsesFloatReg(tree->gtType)) + { + this->compFloatingPointUsed = true; + } + + // We've now consumed the GTF_INX_RNGCHK, and the node + // is no longer a GT_INDEX node. + tree->gtFlags &= ~GTF_INX_RNGCHK; + + tree->AsOp()->gtOp1 = addr; + + // This is an array index expression. + tree->gtFlags |= GTF_IND_ARR_INDEX; + + // If there's a bounds check, the indir won't fault. + if (bndsChk) + { + tree->gtFlags |= GTF_IND_NONFAULTING; + } + else + { + tree->gtFlags |= GTF_EXCEPT; + } + + if (nCSE) + { + tree->gtFlags |= GTF_DONT_CSE; + } + + // Store information about it. + GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType)); + + // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it. + + GenTree* indTree = tree; + + // Did we create a bndsChk tree? + if (bndsChk) + { + // Use a GT_COMMA node to prepend the array bound check + // + tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree); + + /* Mark the indirection node as needing a range check */ + fgSetRngChkTarget(bndsChk); + } + + if (indexDefn != nullptr) + { + // Use a GT_COMMA node to prepend the index assignment + // + tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree); + } + if (arrRefDefn != nullptr) + { + // Use a GT_COMMA node to prepend the arRef assignment + // + tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree); + } + + // Currently we morph the tree to perform some folding operations prior + // to attaching fieldSeq info and labeling constant array index contributions + // + fgMorphTree(tree); + + // Ideally we just want to proceed to attaching fieldSeq info and labeling the + // constant array index contributions, but the morphing operation may have changed + // the 'tree' into something that now unconditionally throws an exception. + // + // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified + // or it could be left unchanged. If it is unchanged then we should not return, + // instead we should proceed to attaching fieldSeq info, etc... + // + GenTree* arrElem = tree->gtEffectiveVal(); + + if (fgIsCommaThrow(tree)) + { + if ((arrElem != indTree) || // A new tree node may have been created + (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT + { + return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc.. + } + } + + assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)); + + addr = arrElem->AsOp()->gtOp1; + + assert(addr->TypeGet() == TYP_BYREF); + + GenTree* cnsOff = nullptr; + if (addr->OperGet() == GT_ADD) + { + assert(addr->TypeGet() == TYP_BYREF); + assert(addr->AsOp()->gtOp1->TypeGet() == TYP_REF); + + addr = addr->AsOp()->gtOp2; + + // Look for the constant [#FirstElem] node here, or as the RHS of an ADD. + + if (addr->gtOper == GT_CNS_INT) + { + cnsOff = addr; + addr = nullptr; + } + else + { + if ((addr->OperGet() == GT_ADD) && (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT)) + { + cnsOff = addr->AsOp()->gtOp2; + addr = addr->AsOp()->gtOp1; + } + + // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX + addr->LabelIndex(this); + } + } + else if (addr->OperGet() == GT_CNS_INT) + { + cnsOff = addr; + } + + FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField); + + if ((cnsOff != nullptr) && (cnsOff->AsIntCon()->gtIconVal == elemOffs)) + { + // Assign it the [#FirstElem] field sequence + // + cnsOff->AsIntCon()->gtFieldSeq = firstElemFseq; + } + else // We have folded the first element's offset with the index expression + { + // Build the [#ConstantIndex, #FirstElem] field sequence + // + FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); + FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq); + + if (cnsOff == nullptr) // It must have folded into a zero offset + { + // Record in the general zero-offset map. + fgAddFieldSeqForZeroOffset(addr, fieldSeq); + } + else + { + cnsOff->AsIntCon()->gtFieldSeq = fieldSeq; + } + } + + return tree; +} + +#ifdef TARGET_X86 +/***************************************************************************** + * + * Wrap fixed stack arguments for varargs functions to go through varargs + * cookie to access them, except for the cookie itself. + * + * Non-x86 platforms are allowed to access all arguments directly + * so we don't need this code. + * + */ +GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs) +{ + /* For the fixed stack arguments of a varargs function, we need to go + through the varargs cookies to access them, except for the + cookie itself */ + + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg) + { + // Create a node representing the local pointing to the base of the args + GenTree* ptrArg = + gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL), + gtNewIconNode(varDsc->GetStackOffset() - + codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES - lclOffs)); + + // Access the argument through the local + GenTree* tree; + if (varTypeIsStruct(varType)) + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->GetStructHnd(); + assert(typeHnd != nullptr); + tree = gtNewObjNode(typeHnd, ptrArg); + } + else + { + tree = gtNewOperNode(GT_IND, varType, ptrArg); + } + tree->gtFlags |= GTF_IND_TGTANYWHERE; + + if (varDsc->lvAddrExposed) + { + tree->gtFlags |= GTF_GLOB_REF; + } + + return fgMorphTree(tree); + } + + return NULL; +} +#endif + +/***************************************************************************** + * + * Transform the given GT_LCL_VAR tree for code generation. + */ + +GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph) +{ + assert(tree->gtOper == GT_LCL_VAR); + + unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); + var_types varType = lvaGetRealType(lclNum); + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (varDsc->lvAddrExposed) + { + tree->gtFlags |= GTF_GLOB_REF; + } + +#ifdef TARGET_X86 + if (info.compIsVarArgs) + { + GenTree* newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0); + if (newTree != nullptr) + { + if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) + { + newTree->SetOper(GT_IND); + } + return newTree; + } + } +#endif // TARGET_X86 + + /* If not during the global morphing phase bail */ + + if (!fgGlobalMorph && !forceRemorph) + { + return tree; + } + + bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0; + + noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr + + if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad()) + { +#if LOCAL_ASSERTION_PROP + /* Assertion prop can tell us to omit adding a cast here */ + if (optLocalAssertionProp && optAssertionIsSubrange(tree, TYP_INT, varType, apFull) != NO_ASSERTION_INDEX) + { + return tree; + } +#endif + /* Small-typed arguments and aliased locals are normalized on load. + Other small-typed locals are normalized on store. + Also, under the debugger as the debugger could write to the variable. + If this is one of the former, insert a narrowing cast on the load. + ie. Convert: var-short --> cast-short(var-int) */ + + tree->gtType = TYP_INT; + fgMorphTreeDone(tree); + tree = gtNewCastNode(TYP_INT, tree, false, varType); + fgMorphTreeDone(tree); + return tree; + } + + return tree; +} + +/***************************************************************************** + Grab a temp for big offset morphing. + This method will grab a new temp if no temp of this "type" has been created. + Or it will return the same cached one if it has been created. +*/ +unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type) +{ + unsigned lclNum = fgBigOffsetMorphingTemps[type]; + + if (lclNum == BAD_VAR_NUM) + { + // We haven't created a temp for this kind of type. Create one now. + lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing")); + fgBigOffsetMorphingTemps[type] = lclNum; + } + else + { + // We better get the right type. + noway_assert(lvaTable[lclNum].TypeGet() == type); + } + + noway_assert(lclNum != BAD_VAR_NUM); + return lclNum; +} + +/***************************************************************************** + * + * Transform the given GT_FIELD tree for code generation. + */ + +GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) +{ + assert(tree->gtOper == GT_FIELD); + + CORINFO_FIELD_HANDLE symHnd = tree->AsField()->gtFldHnd; + unsigned fldOffset = tree->AsField()->gtFldOffset; + GenTree* objRef = tree->AsField()->gtFldObj; + bool fieldMayOverlap = false; + bool objIsLocal = false; + + if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR)) + { + // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter. + // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the + // simd field rewrites are sensitive to. + fgMorphImplicitByRefArgs(objRef); + } + + noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) || + ((tree->gtFlags & GTF_GLOB_REF) != 0)); + + if (tree->AsField()->gtFldMayOverlap) + { + fieldMayOverlap = true; + // Reset the flag because we may reuse the node. + tree->AsField()->gtFldMayOverlap = false; + } + +#ifdef FEATURE_SIMD + // if this field belongs to simd struct, translate it to simd intrinsic. + if (mac == nullptr) + { + GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree); + if (newTree != tree) + { + newTree = fgMorphSmpOp(newTree); + return newTree; + } + } + else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1())) + { + GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr(); + if (lcl != nullptr) + { + lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUGARG(DNER_LocalField)); + } + } +#endif + + /* Is this an instance data member? */ + + if (objRef) + { + GenTree* addr; + objIsLocal = objRef->IsLocal(); + + if (tree->gtFlags & GTF_IND_TLS_REF) + { + NO_WAY("instance field can not be a TLS ref."); + } + + /* We'll create the expression "*(objRef + mem_offs)" */ + + noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL); + + /* + Now we have a tree like this: + + +--------------------+ + | GT_FIELD | tree + +----------+---------+ + | + +--------------+-------------+ + | tree->AsField()->gtFldObj | + +--------------+-------------+ + + + We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): + + +--------------------+ + | GT_IND/GT_OBJ | tree + +---------+----------+ + | + | + +---------+----------+ + | GT_ADD | addr + +---------+----------+ + | + / \ + / \ + / \ + +-------------------+ +----------------------+ + | objRef | | fldOffset | + | | | (when fldOffset !=0) | + +-------------------+ +----------------------+ + + + or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT): + + + +--------------------+ + | GT_IND/GT_OBJ | tree + +----------+---------+ + | + +----------+---------+ + | GT_COMMA | comma2 + +----------+---------+ + | + / \ + / \ + / \ + / \ + +---------+----------+ +---------+----------+ + comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr + +---------+----------+ +---------+----------+ + | | + / \ / \ + / \ / \ + / \ / \ + +-----+-----+ +-----+-----+ +---------+ +-----------+ + asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset | + +-----+-----+ +-----+-----+ +---------+ +-----------+ + | | + / \ | + / \ | + / \ | + +-----+-----+ +-----+-----+ +-----------+ + | tmpLcl | | objRef | | tmpLcl | + +-----------+ +-----------+ +-----------+ + + + */ + + var_types objRefType = objRef->TypeGet(); + + GenTree* comma = nullptr; + + // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field, + // and thus is equivalent to a MACK_Ind with zero offset. + MorphAddrContext defMAC(MACK_Ind); + if (mac == nullptr) + { + mac = &defMAC; + } + + // This flag is set to enable the "conservative" style of explicit null-check insertion. + // This means that we insert an explicit null check whenever we create byref by adding a + // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately + // dereferenced). The alternative is "aggressive", which would not insert such checks (for + // small offsets); in this plan, we would transfer some null-checking responsibility to + // callee's of methods taking byref parameters. They would have to add explicit null checks + // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in + // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too + // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null + // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs. + // This is left here to point out how to implement it. + CLANG_FORMAT_COMMENT_ANCHOR; + +#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1 + + bool addExplicitNullCheck = false; + + // Implicit byref locals and string literals are never null. + if (fgAddrCouldBeNull(objRef)) + { + // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression + // whose address is being taken is either a local or static variable, whose address is necessarily + // non-null, or else it is a field dereference, which will do its own bounds checking if necessary. + if (objRef->gtOper != GT_ADDR && (mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind)) + { + if (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)) + { + addExplicitNullCheck = true; + } + else + { + // In R2R mode the field offset for some fields may change when the code + // is loaded. So we can't rely on a zero offset here to suppress the null check. + // + // See GitHub issue #16454. + bool fieldHasChangeableOffset = false; + +#ifdef FEATURE_READYTORUN_COMPILER + fieldHasChangeableOffset = (tree->AsField()->gtFieldLookup.addr != nullptr); +#endif + +#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION + addExplicitNullCheck = (mac->m_kind == MACK_Addr) && + ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset); +#else + addExplicitNullCheck = (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr && + ((mac->m_totalOffset + fldOffset > 0) || fieldHasChangeableOffset)); +#endif + } + } + } + + if (addExplicitNullCheck) + { +#ifdef DEBUG + if (verbose) + { + printf("Before explicit null check morphing:\n"); + gtDispTree(tree); + } +#endif + + // + // Create the "comma" subtree + // + GenTree* asg = nullptr; + GenTree* nullchk; + + unsigned lclNum; + + if (objRef->gtOper != GT_LCL_VAR) + { + lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet())); + + // Create the "asg" node + asg = gtNewTempAssign(lclNum, objRef); + } + else + { + lclNum = objRef->AsLclVarCommon()->GetLclNum(); + } + + GenTree* lclVar = gtNewLclvNode(lclNum, objRefType); + nullchk = gtNewNullCheck(lclVar, compCurBB); + + nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections + + if (asg) + { + // Create the "comma" node. + comma = gtNewOperNode(GT_COMMA, + TYP_VOID, // We don't want to return anything from this "comma" node. + // Set the type to TYP_VOID, so we can select "cmp" instruction + // instead of "mov" instruction later on. + asg, nullchk); + } + else + { + comma = nullchk; + } + + addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node. + } + else + { + addr = objRef; + } + +#ifdef FEATURE_READYTORUN_COMPILER + if (tree->AsField()->gtFieldLookup.addr != nullptr) + { + GenTree* offsetNode = nullptr; + if (tree->AsField()->gtFieldLookup.accessType == IAT_PVALUE) + { + offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->AsField()->gtFieldLookup.addr, + GTF_ICON_CONST_PTR, true); +#ifdef DEBUG + offsetNode->gtGetOp1()->AsIntCon()->gtTargetHandle = (size_t)symHnd; +#endif + } + else + { + noway_assert(!"unexpected accessType for R2R field access"); + } + + var_types addType = (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF; + addr = gtNewOperNode(GT_ADD, addType, addr, offsetNode); + } +#endif + if (fldOffset != 0) + { + // Generate the "addr" node. + /* Add the member offset to the object's address */ + FieldSeqNode* fieldSeq = + fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, + gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq)); + } + + // Now let's set the "tree" as a GT_IND tree. + + tree->SetOper(GT_IND); + tree->AsOp()->gtOp1 = addr; + + tree->SetIndirExceptionFlags(this); + + if (addExplicitNullCheck) + { + // + // Create "comma2" node and link it to "tree". + // + GenTree* comma2; + comma2 = gtNewOperNode(GT_COMMA, + addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node. + comma, addr); + tree->AsOp()->gtOp1 = comma2; + } + +#ifdef DEBUG + if (verbose) + { + if (addExplicitNullCheck) + { + printf("After adding explicit null check:\n"); + gtDispTree(tree); + } + } +#endif + } + else /* This is a static data member */ + { + if (tree->gtFlags & GTF_IND_TLS_REF) + { + // Thread Local Storage static field reference + // + // Field ref is a TLS 'Thread-Local-Storage' reference + // + // Build this tree: IND(*) # + // | + // ADD(I_IMPL) + // / \. + // / CNS(fldOffset) + // / + // / + // / + // IND(I_IMPL) == [Base of this DLL's TLS] + // | + // ADD(I_IMPL) + // / \. + // / CNS(IdValue*4) or MUL + // / / \. + // IND(I_IMPL) / CNS(4) + // | / + // CNS(TLS_HDL,0x2C) IND + // | + // CNS(pIdAddr) + // + // # Denotes the orginal node + // + void** pIdAddr = nullptr; + unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr); + + // + // If we can we access the TLS DLL index ID value directly + // then pIdAddr will be NULL and + // IdValue will be the actual TLS DLL index ID + // + GenTree* dllRef = nullptr; + if (pIdAddr == nullptr) + { + if (IdValue != 0) + { + dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL); + } + } + else + { + dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_CONST_PTR, true); + + // Next we multiply by 4 + dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL)); + } + +#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides + + // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] + + GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL); + + // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS + if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) + { + tree->gtFlags &= ~GTF_FLD_INITCLASS; + tlsRef->gtFlags |= GTF_ICON_INITCLASS; + } + + tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); + + if (dllRef != nullptr) + { + /* Add the dllRef */ + tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); + } + + /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */ + tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef); + + if (fldOffset != 0) + { + FieldSeqNode* fieldSeq = + fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + GenTree* fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq); + + /* Add the TLS static field offset to the address */ + + tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode); + } + + // Final indirect to get to actual value of TLS static field + + tree->SetOper(GT_IND); + tree->AsOp()->gtOp1 = tlsRef; + + noway_assert(tree->gtFlags & GTF_IND_TLS_REF); + } + else + { + // Normal static field reference + + // + // If we can we access the static's address directly + // then pFldAddr will be NULL and + // fldAddr will be the actual address of the static field + // + void** pFldAddr = nullptr; + void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr); + + // We should always be able to access this static field address directly + // + assert(pFldAddr == nullptr); + +#ifdef TARGET_64BIT + bool isStaticReadOnlyInited = false; + bool plsSpeculative = true; + if (info.compCompHnd->getStaticFieldCurrentClass(symHnd, &plsSpeculative) != NO_CLASS_HANDLE) + { + isStaticReadOnlyInited = !plsSpeculative; + } + + // even if RelocTypeHint is REL32 let's still prefer IND over GT_CLS_VAR + // for static readonly fields of statically initialized classes - thus we can + // apply GTF_IND_INVARIANT flag and make it hoistable/CSE-friendly + if (isStaticReadOnlyInited || (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))) + { + // The address is not directly addressible, so force it into a + // constant, so we handle it properly + + GenTree* addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL); + addr->gtType = TYP_I_IMPL; + FieldSeqNode* fieldSeq = + fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + addr->AsIntCon()->gtFieldSeq = fieldSeq; + // Translate GTF_FLD_INITCLASS to GTF_ICON_INITCLASS + if ((tree->gtFlags & GTF_FLD_INITCLASS) != 0) + { + tree->gtFlags &= ~GTF_FLD_INITCLASS; + addr->gtFlags |= GTF_ICON_INITCLASS; + } + + tree->SetOper(GT_IND); + tree->AsOp()->gtOp1 = addr; + + if (isStaticReadOnlyInited) + { + JITDUMP("Marking initialized static read-only field '%s' as invariant.\n", eeGetFieldName(symHnd)); + + // Static readonly field is not null at this point (see getStaticFieldCurrentClass impl). + tree->gtFlags |= (GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL); + tree->gtFlags &= ~GTF_ICON_INITCLASS; + addr->gtFlags = GTF_ICON_CONST_PTR; + } + + return fgMorphSmpOp(tree); + } + else +#endif // TARGET_64BIT + { + // Only volatile or classinit could be set, and they map over + noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0); + static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE); + static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS); + tree->SetOper(GT_CLS_VAR); + tree->AsClsVar()->gtClsVarHnd = symHnd; + FieldSeqNode* fieldSeq = + fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + tree->AsClsVar()->gtFieldSeq = fieldSeq; + } + + return tree; + } + } + noway_assert(tree->gtOper == GT_IND); + + if (fldOffset == 0) + { + GenTree* addr = tree->AsOp()->gtOp1; + + // 'addr' may be a GT_COMMA. Skip over any comma nodes + addr = addr->gtEffectiveVal(); + +#ifdef DEBUG + if (verbose) + { + printf("\nBefore calling fgAddFieldSeqForZeroOffset:\n"); + gtDispTree(tree); + } +#endif + + // We expect 'addr' to be an address at this point. + assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); + + // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node. + FieldSeqNode* fieldSeq = + fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd); + fgAddFieldSeqForZeroOffset(addr, fieldSeq); + } + + // Pass down the current mac; if non null we are computing an address + GenTree* result = fgMorphSmpOp(tree, mac); + +#ifdef DEBUG + if (verbose) + { + printf("\nFinal value of Compiler::fgMorphField after calling fgMorphSmpOp:\n"); + gtDispTree(result); + } +#endif + + return result; +} + +//------------------------------------------------------------------------------ +// fgMorphCallInline: attempt to inline a call +// +// Arguments: +// call - call expression to inline, inline candidate +// inlineResult - result tracking and reporting +// +// Notes: +// Attempts to inline the call. +// +// If successful, callee's IR is inserted in place of the call, and +// is marked with an InlineContext. +// +// If unsuccessful, the transformations done in anticipation of a +// possible inline are undone, and the candidate flag on the call +// is cleared. + +void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) +{ + bool inliningFailed = false; + + // Is this call an inline candidate? + if (call->IsInlineCandidate()) + { + // Attempt the inline + fgMorphCallInlineHelper(call, inlineResult); + + // We should have made up our minds one way or another.... + assert(inlineResult->IsDecided()); + + // If we failed to inline, we have a bit of work to do to cleanup + if (inlineResult->IsFailure()) + { + +#ifdef DEBUG + + // Before we do any cleanup, create a failing InlineContext to + // capture details of the inlining attempt. + m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult); + +#endif + + inliningFailed = true; + + // Clear the Inline Candidate flag so we can ensure later we tried + // inlining all candidates. + // + call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE; + } + } + else + { + // This wasn't an inline candidate. So it must be a GDV candidate. + assert(call->IsGuardedDevirtualizationCandidate()); + + // We already know we can't inline this call, so don't even bother to try. + inliningFailed = true; + } + + // If we failed to inline (or didn't even try), do some cleanup. + if (inliningFailed) + { + if (call->gtReturnType != TYP_VOID) + { + JITDUMP("Inlining [%06u] failed, so bashing " FMT_STMT " to NOP\n", dspTreeID(call), fgMorphStmt->GetID()); + + // Detach the GT_CALL tree from the original statement by + // hanging a "nothing" node to it. Later the "nothing" node will be removed + // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. + + noway_assert(fgMorphStmt->GetRootNode() == call); + fgMorphStmt->SetRootNode(gtNewNothingNode()); + } + } +} + +/***************************************************************************** + * Helper to attempt to inline a call + * Sets success/failure in inline result + * If success, modifies current method's IR with inlinee's IR + * If failed, undoes any speculative modifications to current method + */ + +void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) +{ + // Don't expect any surprises here. + assert(result->IsCandidate()); + + if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING) + { + // For now, attributing this to call site, though it's really + // more of a budget issue (lvaCount currently includes all + // caller and prospective callee locals). We still might be + // able to inline other callees into this caller, or inline + // this callee in other callers. + result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS); + return; + } + + if (call->IsVirtual()) + { + result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL); + return; + } + + // Re-check this because guarded devirtualization may allow these through. + if (gtIsRecursiveCall(call) && call->IsImplicitTailCall()) + { + result->NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL); + return; + } + + // impMarkInlineCandidate() is expected not to mark tail prefixed calls + // and recursive tail calls as inline candidates. + noway_assert(!call->IsTailPrefixedCall()); + noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call)); + + // + // Calling inlinee's compiler to inline the method. + // + + unsigned startVars = lvaCount; + +#ifdef DEBUG + if (verbose) + { + printf("Expanding INLINE_CANDIDATE in statement "); + printStmtID(fgMorphStmt); + printf(" in " FMT_BB ":\n", compCurBB->bbNum); + gtDispStmt(fgMorphStmt); + if (call->IsImplicitTailCall()) + { + printf("Note: candidate is implicit tail call\n"); + } + } +#endif + + impInlineRoot()->m_inlineStrategy->NoteAttempt(result); + + // + // Invoke the compiler to inline the call. + // + + fgInvokeInlineeCompiler(call, result); + + if (result->IsFailure()) + { + // Undo some changes made in anticipation of inlining... + + // Zero out the used locals + memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable)); + for (unsigned i = startVars; i < lvaCount; i++) + { + new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(); // call the constructor. + } + + lvaCount = startVars; + +#ifdef DEBUG + if (verbose) + { + // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount); + } +#endif + + return; + } + +#ifdef DEBUG + if (verbose) + { + // printf("After inlining lvaCount=%d.\n", lvaCount); + } +#endif +} + +//------------------------------------------------------------------------ +// fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp. +// +// Arguments: +// callee - The callee to check +// failReason - If this method returns false, the reason why. Can be nullptr. +// +// Return Value: +// Returns true or false based on whether the callee can be fastTailCalled +// +// Notes: +// This function is target specific and each target will make the fastTailCall +// decision differently. See the notes below. +// +// This function calls fgInitArgInfo() to initialize the arg info table, which +// is used to analyze the argument. This function can alter the call arguments +// by adding argument IR nodes for non-standard arguments. +// +// Windows Amd64: +// A fast tail call can be made whenever the number of callee arguments +// is less than or equal to the number of caller arguments, or we have four +// or fewer callee arguments. This is because, on Windows AMD64, each +// argument uses exactly one register or one 8-byte stack slot. Thus, we only +// need to count arguments, and not be concerned with the size of each +// incoming or outgoing argument. +// +// Can fast tail call examples (amd64 Windows): +// +// -- Callee will have all register arguments -- +// caller(int, int, int, int) +// callee(int, int, float, int) +// +// -- Callee requires stack space that is equal or less than the caller -- +// caller(struct, struct, struct, struct, struct, struct) +// callee(int, int, int, int, int, int) +// +// -- Callee requires stack space that is less than the caller -- +// caller(struct, double, struct, float, struct, struct) +// callee(int, int, int, int, int) +// +// -- Callee will have all register arguments -- +// caller(int) +// callee(int, int, int, int) +// +// Cannot fast tail call examples (amd64 Windows): +// +// -- Callee requires stack space that is larger than the caller -- +// caller(struct, double, struct, float, struct, struct) +// callee(int, int, int, int, int, double, double, double) +// +// -- Callee has a byref struct argument -- +// caller(int, int, int) +// callee(struct(size 3 bytes)) +// +// Unix Amd64 && Arm64: +// A fastTailCall decision can be made whenever the callee's stack space is +// less than or equal to the caller's stack space. There are many permutations +// of when the caller and callee have different stack sizes if there are +// structs being passed to either the caller or callee. +// +// Exceptions: +// 1) If the callee has structs which cannot be enregistered it will be +// reported as cannot fast tail call. This is an implementation limitation +// where the callee only is checked for non enregisterable structs. This is +// tracked with https://github.com/dotnet/runtime/issues/8492. +// +// 2) If the caller or callee has stack arguments and the callee has more +// arguments then the caller it will be reported as cannot fast tail call. +// This is due to a bug in LowerFastTailCall which assumes that +// nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This +// is tracked with https://github.com/dotnet/runtime/issues/8413. +// +// 3) If the callee has a 9 to 16 byte struct argument and the callee has +// stack arguments, the decision will be to not fast tail call. This is +// because before fgMorphArgs is done, the struct is unknown whether it +// will be placed on the stack or enregistered. Therefore, the conservative +// decision of do not fast tail call is taken. This limitations should be +// removed if/when fgMorphArgs no longer depends on fgCanFastTailCall. +// +// Can fast tail call examples (amd64 Unix): +// +// -- Callee will have all register arguments -- +// caller(int, int, int, int) +// callee(int, int, float, int) +// +// -- Callee requires stack space that is equal to the caller -- +// caller({ long, long }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte +// stack +// space +// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space +// +// -- Callee requires stack space that is less than the caller -- +// caller({ long, long }, int, { long, long }, int, { long, long }, { long, long }) 6 int register arguments, 32 byte +// stack +// space +// callee(int, int, int, int, int, int, { long, long } ) // 6 int register arguments, 16 byte stack space +// +// -- Callee will have all register arguments -- +// caller(int) +// callee(int, int, int, int) +// +// Cannot fast tail call examples (amd64 Unix): +// +// -- Callee requires stack space that is larger than the caller -- +// caller(float, float, float, float, float, float, float, float) -- 8 float register arguments +// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space +// +// -- Callee has structs which cannot be enregistered (Implementation Limitation) -- +// caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register +// arguments, 24 byte stack space +// callee({ double, double, double }) -- 24 bytes stack space +// +// -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) -- +// caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space +// callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space +// +// -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) -- +// caller({ double, double, double, double, double, double }) // 48 byte stack +// callee(int, int) -- 2 int registers + +bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) +{ +#if FEATURE_FASTTAILCALL + + // To reach here means that the return types of the caller and callee are tail call compatible. + // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (callee->IsTailPrefixedCall()) + { + var_types retType = info.compRetType; + assert(impTailCallRetTypeCompatible(retType, info.compMethodInfo->args.retTypeClass, info.compCallConv, + (var_types)callee->gtReturnType, callee->gtRetClsHnd, + callee->GetUnmanagedCallConv())); + } +#endif + + assert(!callee->AreArgsComplete()); + + fgInitArgInfo(callee); + + fgArgInfo* argInfo = callee->fgArgInfo; + + unsigned calleeArgStackSize = 0; + unsigned callerArgStackSize = info.compArgStackSize; + + for (unsigned index = 0; index < argInfo->ArgCount(); ++index) + { + fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); + + calleeArgStackSize = roundUp(calleeArgStackSize, arg->GetByteAlignment()); + calleeArgStackSize += arg->GetStackByteSize(); + } + calleeArgStackSize = GetOutgoingArgByteSize(calleeArgStackSize); + + auto reportFastTailCallDecision = [&](const char* thisFailReason) { + if (failReason != nullptr) + { + *failReason = thisFailReason; + } + +#ifdef DEBUG + if ((JitConfig.JitReportFastTailCallDecisions()) == 1) + { + if (callee->gtCallType != CT_INDIRECT) + { + const char* methodName; + + methodName = eeGetMethodFullName(callee->gtCallMethHnd); + + printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ", + info.compFullName, methodName); + } + else + { + printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- " + "Decision: ", + info.compFullName); + } + + if (thisFailReason == nullptr) + { + printf("Will fast tailcall"); + } + else + { + printf("Will not fast tailcall (%s)", thisFailReason); + } + + printf(" (CallerArgStackSize: %d, CalleeArgStackSize: %d)\n\n", callerArgStackSize, calleeArgStackSize); + } + else + { + if (thisFailReason == nullptr) + { + JITDUMP("[Fast tailcall decision]: Will fast tailcall\n"); + } + else + { + JITDUMP("[Fast tailcall decision]: Will not fast tailcall (%s)\n", thisFailReason); + } + } +#endif // DEBUG + }; + + if (!opts.compFastTailCalls) + { + reportFastTailCallDecision("Configuration doesn't allow fast tail calls"); + return false; + } + + if (callee->IsStressTailCall()) + { + reportFastTailCallDecision("Fast tail calls are not performed under tail call stress"); + return false; + } + + // Note on vararg methods: + // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. + // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its + // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as + // out-going area required for callee is bounded by caller's fixed argument space. + // + // Note that callee being a vararg method is not a problem since we can account the params being passed. + // + // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg + // method. This is due to the ABI differences for native vararg methods for these platforms. There is + // work required to shuffle arguments to the correct locations. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) + if (info.compIsVarArgs || callee->IsVarargs()) + { + reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64"); + return false; + } +#endif // (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) + + if (compLocallocUsed) + { + reportFastTailCallDecision("Localloc used"); + return false; + } + +#ifdef TARGET_AMD64 + // Needed for Jit64 compat. + // In future, enabling fast tail calls from methods that need GS cookie + // check would require codegen side work to emit GS cookie check before a + // tail call. + if (getNeedsGSSecurityCookie()) + { + reportFastTailCallDecision("GS Security cookie check required"); + return false; + } +#endif + + // If the NextCallReturnAddress intrinsic is used we should do normal calls. + if (info.compHasNextCallRetAddr) + { + reportFastTailCallDecision("Uses NextCallReturnAddress intrinsic"); + return false; + } + + if (callee->HasRetBufArg()) // RetBuf + { + // If callee has RetBuf param, caller too must have it. + // Otherwise go the slow route. + if (info.compRetBuffArg == BAD_VAR_NUM) + { + reportFastTailCallDecision("Callee has RetBuf but caller does not."); + return false; + } + } + + // For a fast tail call the caller will use its incoming arg stack space to place + // arguments, so if the callee requires more arg stack space than is available here + // the fast tail call cannot be performed. This is common to all platforms. + // Note that the GC'ness of on stack args need not match since the arg setup area is marked + // as non-interruptible for fast tail calls. + if (calleeArgStackSize > callerArgStackSize) + { + reportFastTailCallDecision("Not enough incoming arg space"); + return false; + } + + // For Windows some struct parameters are copied on the local frame + // and then passed by reference. We cannot fast tail call in these situation + // as we need to keep our frame around. + if (fgCallHasMustCopyByrefParameter(callee)) + { + reportFastTailCallDecision("Callee has a byref parameter"); + return false; + } + + reportFastTailCallDecision(nullptr); + return true; +#else // FEATURE_FASTTAILCALL + if (failReason) + *failReason = "Fast tailcalls are not supported on this platform"; + return false; +#endif +} + +//------------------------------------------------------------------------ +// fgCallHasMustCopyByrefParameter: Check to see if this call has a byref parameter that +// requires a struct copy in the caller. +// +// Arguments: +// callee - The callee to check +// +// Return Value: +// Returns true or false based on whether this call has a byref parameter that +// requires a struct copy in the caller. + +#if FEATURE_FASTTAILCALL +bool Compiler::fgCallHasMustCopyByrefParameter(GenTreeCall* callee) +{ + fgArgInfo* argInfo = callee->fgArgInfo; + + bool hasMustCopyByrefParameter = false; + + for (unsigned index = 0; index < argInfo->ArgCount(); ++index) + { + fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); + + if (arg->isStruct) + { + if (arg->passedByRef) + { + // Generally a byref arg will block tail calling, as we have to + // make a local copy of the struct for the callee. + hasMustCopyByrefParameter = true; + + // If we're optimizing, we may be able to pass our caller's byref to our callee, + // and so still be able to avoid a struct copy. + if (opts.OptimizationEnabled()) + { + // First, see if this arg is an implicit byref param. + GenTreeLclVar* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); + + if (lcl != nullptr) + { + // Yes, the arg is an implicit byref param. + const unsigned lclNum = lcl->GetLclNum(); + LclVarDsc* const varDsc = lvaGetDesc(lcl); + + // The param must not be promoted; if we've promoted, then the arg will be + // a local struct assembled from the promoted fields. + if (varDsc->lvPromoted) + { + JITDUMP("Arg [%06u] is promoted implicit byref V%02u, so no tail call\n", + dspTreeID(arg->GetNode()), lclNum); + } + else + { + JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n", + dspTreeID(arg->GetNode()), lclNum); + + // We have to worry about introducing aliases if we bypass copying + // the struct at the call. We'll do some limited analysis to see if we + // can rule this out. + const unsigned argLimit = 6; + + // If this is the only appearance of the byref in the method, then + // aliasing is not possible. + // + // If no other call arg refers to this byref, and no other arg is + // a pointer which could refer to this byref, we can optimize. + // + // We only check this for calls with small numbers of arguments, + // as the analysis cost will be quadratic. + // + if (varDsc->lvRefCnt(RCS_EARLY) == 1) + { + JITDUMP("... yes, arg is the only appearance of V%02u\n", lclNum); + hasMustCopyByrefParameter = false; + } + else if (argInfo->ArgCount() <= argLimit) + { + GenTree* interferingArg = nullptr; + for (unsigned index2 = 0; index2 < argInfo->ArgCount(); ++index2) + { + if (index2 == index) + { + continue; + } + + fgArgTabEntry* const arg2 = argInfo->GetArgEntry(index2, false); + JITDUMP("... checking other arg [%06u]...\n", dspTreeID(arg2->GetNode())); + DISPTREE(arg2->GetNode()); + + // Do we pass 'lcl' more than once to the callee? + if (arg2->isStruct && arg2->passedByRef) + { + GenTreeLclVarCommon* const lcl2 = + arg2->GetNode()->IsImplicitByrefParameterValue(this); + + if ((lcl2 != nullptr) && (lclNum == lcl2->GetLclNum())) + { + // not copying would introduce aliased implicit byref structs + // in the callee ... we can't optimize. + interferingArg = arg2->GetNode(); + break; + } + else + { + JITDUMP("... arg refers to different implicit byref V%02u\n", + lcl2->GetLclNum()); + continue; + } + } + + // Do we pass a byref pointer which might point within 'lcl'? + // + // We can assume the 'lcl' is unaliased on entry to the + // method, so the only way we can have an aliasing byref pointer at + // the call is if 'lcl' is address taken/exposed in the method. + // + // Note even though 'lcl' is not promoted, we are in the middle + // of the promote->rewrite->undo->(morph)->demote cycle, and so + // might see references to promoted fields of 'lcl' that haven't yet + // been demoted (see fgMarkDemotedImplicitByRefArgs). + // + // So, we also need to scan all 'lcl's fields, if any, to see if they + // are exposed. + // + // When looking for aliases from other args, we check for both TYP_BYREF + // and TYP_I_IMPL typed args here. Conceptually anything that points into + // an implicit byref parameter should be TYP_BYREF, as these parameters could + // refer to boxed heap locations (say if the method is invoked by reflection) + // but there are some stack only structs (like typed references) where + // the importer/runtime code uses TYP_I_IMPL, and fgInitArgInfo will + // transiently retype all simple address-of implicit parameter args as + // TYP_I_IMPL. + // + if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL)) + { + JITDUMP("...arg is a byref, must run an alias check\n"); + bool checkExposure = true; + bool hasExposure = false; + + // See if there is any way arg could refer to a parameter struct. + GenTree* arg2Node = arg2->GetNode(); + if (arg2Node->OperIs(GT_LCL_VAR)) + { + GenTreeLclVarCommon* arg2LclNode = arg2Node->AsLclVarCommon(); + assert(arg2LclNode->GetLclNum() != lclNum); + LclVarDsc* arg2Dsc = lvaGetDesc(arg2LclNode); + + // Other params can't alias implicit byref params + if (arg2Dsc->lvIsParam) + { + checkExposure = false; + } + } + // Because we're checking TYP_I_IMPL above, at least + // screen out obvious things that can't cause aliases. + else if (arg2Node->IsIntegralConst()) + { + checkExposure = false; + } + + if (checkExposure) + { + JITDUMP( + "... not sure where byref arg points, checking if V%02u is exposed\n", + lclNum); + // arg2 might alias arg, see if we've exposed + // arg somewhere in the method. + if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) + { + // Struct as a whole is exposed, can't optimize + JITDUMP("... V%02u is exposed\n", lclNum); + hasExposure = true; + } + else if (varDsc->lvFieldLclStart != 0) + { + // This is the promoted/undone struct case. + // + // The field start is actually the local number of the promoted local, + // use it to enumerate the fields. + const unsigned promotedLcl = varDsc->lvFieldLclStart; + LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl); + JITDUMP("...promoted-unpromoted case -- also checking exposure of " + "fields of V%02u\n", + promotedLcl); + + for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt; + fieldIndex++) + { + LclVarDsc* fieldDsc = + lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex); + + if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed) + { + // Promoted and not yet demoted field is exposed, can't optimize + JITDUMP("... field V%02u is exposed\n", + promotedVarDsc->lvFieldLclStart + fieldIndex); + hasExposure = true; + break; + } + } + } + } + + if (hasExposure) + { + interferingArg = arg2->GetNode(); + break; + } + } + else + { + JITDUMP("...arg is not a byref or implicit byref (%s)\n", + varTypeName(arg2->GetNode()->TypeGet())); + } + } + + if (interferingArg != nullptr) + { + JITDUMP("... no, arg [%06u] may alias with V%02u\n", dspTreeID(interferingArg), + lclNum); + } + else + { + JITDUMP("... yes, no other arg in call can alias V%02u\n", lclNum); + hasMustCopyByrefParameter = false; + } + } + else + { + JITDUMP(" ... no, call has %u > %u args, alias analysis deemed too costly\n", + argInfo->ArgCount(), argLimit); + } + } + } + } + + if (hasMustCopyByrefParameter) + { + // This arg requires a struct copy. No reason to keep scanning the remaining args. + break; + } + } + } + } + + return hasMustCopyByrefParameter; +} +#endif + +//------------------------------------------------------------------------ +// fgMorphPotentialTailCall: Attempt to morph a call that the importer has +// identified as a potential tailcall to an actual tailcall and return the +// placeholder node to use in this case. +// +// Arguments: +// call - The call to morph. +// +// Return Value: +// Returns a node to use if the call was morphed into a tailcall. If this +// function returns a node the call is done being morphed and the new node +// should be used. Otherwise the call will have been demoted to a regular call +// and should go through normal morph. +// +// Notes: +// This is called only for calls that the importer has already identified as +// potential tailcalls. It will do profitability and legality checks and +// classify which kind of tailcall we are able to (or should) do, along with +// modifying the trees to perform that kind of tailcall. +// +GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) +{ + // It should either be an explicit (i.e. tail prefixed) or an implicit tail call + assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall()); + + // It cannot be an inline candidate + assert(!call->IsInlineCandidate()); + + auto failTailCall = [&](const char* reason, unsigned lclNum = BAD_VAR_NUM) { +#ifdef DEBUG + if (verbose) + { + printf("\nRejecting tail call in morph for call "); + printTreeID(call); + printf(": %s", reason); + if (lclNum != BAD_VAR_NUM) + { + printf(" V%02u", lclNum); + } + printf("\n"); + } +#endif + + // for non user funcs, we have no handles to report + info.compCompHnd->reportTailCallDecision(nullptr, + (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, + call->IsTailPrefixedCall(), TAILCALL_FAIL, reason); + + // We have checked the candidate so demote. + call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; +#if FEATURE_TAILCALL_OPT + call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; +#endif + }; + + if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + { + failTailCall("Might turn into an intrinsic"); + return nullptr; + } + + // Heuristic: regular calls to noreturn methods can sometimes be + // merged, so if we have multiple such calls, we defer tail calling. + // + // TODO: re-examine this; now that we're merging before morph we + // don't need to worry about interfering with merges. + // + if (call->IsNoReturn() && (optNoReturnCallCount > 1)) + { + failTailCall("Defer tail calling throw helper; anticipating merge"); + return nullptr; + } + +#ifdef DEBUG + if (opts.compGcChecks && (info.compRetType == TYP_REF)) + { + failTailCall("COMPlus_JitGCChecks or stress might have interposed a call to CORINFO_HELP_CHECK_OBJ, " + "invalidating tailcall opportunity"); + return nullptr; + } +#endif + + // We have to ensure to pass the incoming retValBuf as the + // outgoing one. Using a temp will not do as this function will + // not regain control to do the copy. This can happen when inlining + // a tailcall which also has a potential tailcall in it: the IL looks + // like we can do a tailcall, but the trees generated use a temp for the inlinee's + // result. TODO-CQ: Fix this. + if (info.compRetBuffArg != BAD_VAR_NUM) + { + noway_assert(call->TypeGet() == TYP_VOID); + GenTree* retValBuf = call->gtCallArgs->GetNode(); + if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg) + { + failTailCall("Need to copy return buffer"); + return nullptr; + } + } + + // We are still not sure whether it can be a tail call. Because, when converting + // a call to an implicit tail call, we must check that there are no locals with + // their address taken. If this is the case, we have to assume that the address + // has been leaked and the current stack frame must live until after the final + // call. + + // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note + // that lvHasLdAddrOp is much more conservative. We cannot just base it on + // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs + // during morph stage. The reason for also checking lvAddrExposed is that in case + // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp. + // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us + // never to be incorrect. + // + // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose + // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed + // is set. This avoids the need for iterating through all lcl vars of the current + // method. Right now throughout the code base we are not consistently using 'set' + // method to set lvHasLdAddrOp and lvAddrExposed flags. + + bool isImplicitOrStressTailCall = call->IsImplicitTailCall() || call->IsStressTailCall(); + if (isImplicitOrStressTailCall && compLocallocUsed) + { + failTailCall("Localloc used"); + return nullptr; + } + + bool hasStructParam = false; + for (unsigned varNum = 0; varNum < lvaCount; varNum++) + { + LclVarDsc* varDsc = lvaTable + varNum; + // If the method is marked as an explicit tail call we will skip the + // following three hazard checks. + // We still must check for any struct parameters and set 'hasStructParam' + // so that we won't transform the recursive tail call into a loop. + // + if (isImplicitOrStressTailCall) + { + if (varDsc->lvHasLdAddrOp && !lvaIsImplicitByRefLocal(varNum)) + { + failTailCall("Local address taken", varNum); + return nullptr; + } + if (varDsc->lvAddrExposed) + { + if (lvaIsImplicitByRefLocal(varNum)) + { + // The address of the implicit-byref is a non-address use of the pointer parameter. + } + else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl)) + { + // The address of the implicit-byref's field is likewise a non-address use of the pointer + // parameter. + } + else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum)) + { + // This temp was used for struct promotion bookkeeping. It will not be used, and will have + // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs. + assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl)); + assert(fgGlobalMorph); + } + else + { + failTailCall("Local address taken", varNum); + return nullptr; + } + } + if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum)) + { + failTailCall("Has Struct Promoted Param", varNum); + return nullptr; + } + if (varDsc->lvPinned) + { + // A tail call removes the method from the stack, which means the pinning + // goes away for the callee. We can't allow that. + failTailCall("Has Pinned Vars", varNum); + return nullptr; + } + } + + if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam) + { + hasStructParam = true; + // This prevents transforming a recursive tail call into a loop + // but doesn't prevent tail call optimization so we need to + // look at the rest of parameters. + } + } + + if (!fgCheckStmtAfterTailCall()) + { + failTailCall("Unexpected statements after the tail call"); + return nullptr; + } + + const char* failReason = nullptr; + bool canFastTailCall = fgCanFastTailCall(call, &failReason); + + CORINFO_TAILCALL_HELPERS tailCallHelpers; + bool tailCallViaJitHelper = false; + if (!canFastTailCall) + { + if (call->IsImplicitTailCall()) + { + // Implicit or opportunistic tail calls are always dispatched via fast tail call + // mechanism and never via tail call helper for perf. + failTailCall(failReason); + return nullptr; + } + + assert(call->IsTailPrefixedCall()); + assert(call->tailCallInfo != nullptr); + + // We do not currently handle non-standard args except for VSD stubs. + if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this)) + { + failTailCall( + "Method with non-standard args passed in callee trash register cannot be tail called via helper"); + return nullptr; + } + + // On x86 we have a faster mechanism than the general one which we use + // in almost all cases. See fgCanTailCallViaJitHelper for more information. + if (fgCanTailCallViaJitHelper()) + { + tailCallViaJitHelper = true; + } + else + { + // Make sure we can get the helpers. We do this last as the runtime + // will likely be required to generate these. + CORINFO_RESOLVED_TOKEN* token = nullptr; + CORINFO_SIG_INFO* sig = call->tailCallInfo->GetSig(); + unsigned flags = 0; + if (!call->tailCallInfo->IsCalli()) + { + token = call->tailCallInfo->GetToken(); + if (call->tailCallInfo->IsCallvirt()) + { + flags |= CORINFO_TAILCALL_IS_CALLVIRT; + } + } + + if (call->gtCallThisArg != nullptr) + { + var_types thisArgType = call->gtCallThisArg->GetNode()->TypeGet(); + if (thisArgType != TYP_REF) + { + flags |= CORINFO_TAILCALL_THIS_ARG_IS_BYREF; + } + } + + if (!info.compCompHnd->getTailCallHelpers(token, sig, (CORINFO_GET_TAILCALL_HELPERS_FLAGS)flags, + &tailCallHelpers)) + { + failTailCall("Tail call help not available"); + return nullptr; + } + } + } + + // Check if we can make the tailcall a loop. + bool fastTailCallToLoop = false; +#if FEATURE_TAILCALL_OPT + // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register + // or return type is a struct that can be passed in a register. + // + // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through + // hidden generic context param or through keep alive thisptr), then while transforming a recursive + // call to such a method requires that the generic context stored on stack slot be updated. Right now, + // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming + // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the + // generic type parameters of both caller and callee generic method are the same. + if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() && + !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet())) + { + fastTailCallToLoop = true; + } +#endif + + // Ok -- now we are committed to performing a tailcall. Report the decision. + CorInfoTailCall tailCallResult; + if (fastTailCallToLoop) + { + tailCallResult = TAILCALL_RECURSIVE; + } + else if (canFastTailCall) + { + tailCallResult = TAILCALL_OPTIMIZED; + } + else + { + tailCallResult = TAILCALL_HELPER; + } + + info.compCompHnd->reportTailCallDecision(nullptr, + (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, + call->IsTailPrefixedCall(), tailCallResult, nullptr); + + // Are we currently planning to expand the gtControlExpr as an early virtual call target? + // + if (call->IsExpandedEarly() && call->IsVirtualVtable()) + { + // It isn't alway profitable to expand a virtual call early + // + // We alway expand the TAILCALL_HELPER type late. + // And we exapnd late when we have an optimized tail call + // and the this pointer needs to be evaluated into a temp. + // + if (tailCallResult == TAILCALL_HELPER) + { + // We will alway expand this late in lower instead. + // (see LowerTailCallViaJitHelper as it needs some work + // for us to be able to expand this earlier in morph) + // + call->ClearExpandedEarly(); + } + else if ((tailCallResult == TAILCALL_OPTIMIZED) && + ((call->gtCallThisArg->GetNode()->gtFlags & GTF_SIDE_EFFECT) != 0)) + { + // We generate better code when we expand this late in lower instead. + // + call->ClearExpandedEarly(); + } + } + + // Now actually morph the call. + compTailCallUsed = true; + // This will prevent inlining this call. + call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL; + if (tailCallViaJitHelper) + { + call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_VIA_JIT_HELPER; + } + +#if FEATURE_TAILCALL_OPT + if (fastTailCallToLoop) + { + call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP; + } +#endif + + // Mark that this is no longer a pending tailcall. We need to do this before + // we call fgMorphCall again (which happens in the fast tailcall case) to + // avoid recursing back into this method. + call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL; +#if FEATURE_TAILCALL_OPT + call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL; +#endif + +#ifdef DEBUG + if (verbose) + { + printf("\nGTF_CALL_M_TAILCALL bit set for call "); + printTreeID(call); + printf("\n"); + if (fastTailCallToLoop) + { + printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call "); + printTreeID(call); + printf("\n"); + } + } +#endif + + // If this block has a flow successor, make suitable updates. + // + BasicBlock* const nextBlock = compCurBB->GetUniqueSucc(); + + if (nextBlock == nullptr) + { + // No unique successor. compCurBB should be a return. + // + assert(compCurBB->bbJumpKind == BBJ_RETURN); + } + else + { + // Flow no longer reaches nextBlock from here. + // + fgRemoveRefPred(nextBlock, compCurBB); + + // Adjust profile weights. + // + // Note if this is a tail call to loop, further updates + // are needed once we install the loop edge. + // + if (compCurBB->hasProfileWeight() && nextBlock->hasProfileWeight()) + { + // Since we have linear flow we can update the next block weight. + // + BasicBlock::weight_t const blockWeight = compCurBB->bbWeight; + BasicBlock::weight_t const nextWeight = nextBlock->bbWeight; + BasicBlock::weight_t const newNextWeight = nextWeight - blockWeight; + + // If the math would result in a negative weight then there's + // no local repair we can do; just leave things inconsistent. + // + if (newNextWeight >= 0) + { + // Note if we'd already morphed the IR in nextblock we might + // have done something profile sensitive that we should arguably reconsider. + // + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum, + nextWeight, newNextWeight); + + nextBlock->setBBProfileWeight(newNextWeight); + } + else + { + JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT + " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", + nextBlock->bbNum, nextWeight, compCurBB->bbNum, blockWeight); + } + + // If nextBlock is not a BBJ_RETURN, it should have a unique successor that + // is a BBJ_RETURN, as we allow a little bit of flow after a tail call. + // + if (nextBlock->bbJumpKind != BBJ_RETURN) + { + BasicBlock* nextNextBlock = nextBlock->GetUniqueSucc(); + + // Check if we have a sequence of GT_ASG blocks where the same variable is assigned + // to temp locals over and over. + // + // Also allow casts on the RHSs of the assignments, and blocks with GT_NOPs. + // + if (nextNextBlock->bbJumpKind != BBJ_RETURN) + { + // Make sure the block has a single statement + assert(nextBlock->firstStmt() == nextBlock->lastStmt()); + // And the root node is "ASG(LCL_VAR, LCL_VAR)" + GenTree* asgNode = nextBlock->firstStmt()->GetRootNode(); + assert(asgNode->OperIs(GT_ASG)); + + unsigned lcl = asgNode->gtGetOp1()->AsLclVarCommon()->GetLclNum(); + + while (nextNextBlock->bbJumpKind != BBJ_RETURN) + { + assert(nextNextBlock->firstStmt() == nextNextBlock->lastStmt()); + asgNode = nextNextBlock->firstStmt()->GetRootNode(); + if (!asgNode->OperIs(GT_NOP)) + { + assert(asgNode->OperIs(GT_ASG)); + + GenTree* rhs = asgNode->gtGetOp2(); + while (rhs->OperIs(GT_CAST)) + { + assert(!rhs->gtOverflow()); + rhs = rhs->gtGetOp1(); + } + + assert(lcl == rhs->AsLclVarCommon()->GetLclNum()); + lcl = rhs->AsLclVarCommon()->GetLclNum(); + } + nextNextBlock = nextNextBlock->GetUniqueSucc(); + } + } + + assert(nextNextBlock->bbJumpKind == BBJ_RETURN); + + if (nextNextBlock->hasProfileWeight()) + { + // Do similar updates here. + // + BasicBlock::weight_t const nextNextWeight = nextNextBlock->bbWeight; + BasicBlock::weight_t const newNextNextWeight = nextNextWeight - blockWeight; + + // If the math would result in an negative weight then there's + // no local repair we can do; just leave things inconsistent. + // + if (newNextNextWeight >= 0) + { + JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", + nextNextBlock->bbNum, nextNextWeight, newNextNextWeight); + + nextNextBlock->setBBProfileWeight(newNextNextWeight); + } + else + { + JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT + " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", + nextNextBlock->bbNum, nextNextWeight, compCurBB->bbNum, blockWeight); + } + } + } + } + } + +#if !FEATURE_TAILCALL_OPT_SHARED_RETURN + // We enable shared-ret tail call optimization for recursive calls even if + // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined. + if (gtIsRecursiveCall(call)) +#endif + { + // Many tailcalls will have call and ret in the same block, and thus be + // BBJ_RETURN, but if the call falls through to a ret, and we are doing a + // tailcall, change it here. + compCurBB->bbJumpKind = BBJ_RETURN; + } + + GenTree* stmtExpr = fgMorphStmt->GetRootNode(); + +#ifdef DEBUG + // Tail call needs to be in one of the following IR forms + // Either a call stmt or + // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..))) + // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..))) + // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP) + // In the above, + // GT_CASTS may be nested. + genTreeOps stmtOper = stmtExpr->gtOper; + if (stmtOper == GT_CALL) + { + assert(stmtExpr == call); + } + else + { + assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA); + GenTree* treeWithCall; + if (stmtOper == GT_RETURN) + { + treeWithCall = stmtExpr->gtGetOp1(); + } + else if (stmtOper == GT_COMMA) + { + // Second operation must be nop. + assert(stmtExpr->gtGetOp2()->IsNothingNode()); + treeWithCall = stmtExpr->gtGetOp1(); + } + else + { + treeWithCall = stmtExpr->gtGetOp2(); + } + + // Peel off casts + while (treeWithCall->gtOper == GT_CAST) + { + assert(!treeWithCall->gtOverflow()); + treeWithCall = treeWithCall->gtGetOp1(); + } + + assert(treeWithCall == call); + } +#endif + // Store the call type for later to introduce the correct placeholder. + var_types origCallType = call->TypeGet(); + + GenTree* result; + if (!canFastTailCall && !tailCallViaJitHelper) + { + // For tailcall via CORINFO_TAILCALL_HELPERS we transform into regular + // calls with (to the JIT) regular control flow so we do not need to do + // much special handling. + result = fgMorphTailCallViaHelpers(call, tailCallHelpers); + } + else + { + // Otherwise we will transform into something that does not return. For + // fast tailcalls a "jump" and for tailcall via JIT helper a call to a + // JIT helper that does not return. So peel off everything after the + // call. + Statement* nextMorphStmt = fgMorphStmt->GetNextStmt(); + JITDUMP("Remove all stmts after the call.\n"); + while (nextMorphStmt != nullptr) + { + Statement* stmtToRemove = nextMorphStmt; + nextMorphStmt = stmtToRemove->GetNextStmt(); + fgRemoveStmt(compCurBB, stmtToRemove); + } + + bool isRootReplaced = false; + GenTree* root = fgMorphStmt->GetRootNode(); + + if (root != call) + { + JITDUMP("Replace root node [%06d] with [%06d] tail call node.\n", dspTreeID(root), dspTreeID(call)); + isRootReplaced = true; + fgMorphStmt->SetRootNode(call); + } + + // Avoid potential extra work for the return (for example, vzeroupper) + call->gtType = TYP_VOID; + + // Do some target-specific transformations (before we process the args, + // etc.) for the JIT helper case. + if (tailCallViaJitHelper) + { + fgMorphTailCallViaJitHelper(call); + + // Force re-evaluating the argInfo. fgMorphTailCallViaJitHelper will modify the + // argument list, invalidating the argInfo. + call->fgArgInfo = nullptr; + } + + // Tail call via JIT helper: The VM can't use return address hijacking + // if we're not going to return and the helper doesn't have enough info + // to safely poll, so we poll before the tail call, if the block isn't + // already safe. Since tail call via helper is a slow mechanism it + // doen't matter whether we emit GC poll. his is done to be in parity + // with Jit64. Also this avoids GC info size increase if all most all + // methods are expected to be tail calls (e.g. F#). + // + // Note that we can avoid emitting GC-poll if we know that the current + // BB is dominated by a Gc-SafePoint block. But we don't have dominator + // info at this point. One option is to just add a place holder node for + // GC-poll (e.g. GT_GCPOLL) here and remove it in lowering if the block + // is dominated by a GC-SafePoint. For now it not clear whether + // optimizing slow tail calls is worth the effort. As a low cost check, + // we check whether the first and current basic blocks are + // GC-SafePoints. + // + // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, + // fgSetBlockOrder() is going to mark the method as fully interruptible + // if the block containing this tail call is reachable without executing + // any call. + BasicBlock* curBlock = compCurBB; + if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) || + (fgCreateGCPoll(GCPOLL_INLINE, compCurBB) == curBlock)) + { + // We didn't insert a poll block, so we need to morph the call now + // (Normally it will get morphed when we get to the split poll block) + GenTree* temp = fgMorphCall(call); + noway_assert(temp == call); + } + + // Fast tail call: in case of fast tail calls, we need a jmp epilog and + // hence mark it as BBJ_RETURN with BBF_JMP flag set. + noway_assert(compCurBB->bbJumpKind == BBJ_RETURN); + if (canFastTailCall) + { + compCurBB->bbFlags |= BBF_HAS_JMP; + } + else + { + // We call CORINFO_HELP_TAILCALL which does not return, so we will + // not need epilogue. + compCurBB->bbJumpKind = BBJ_THROW; + } + + if (isRootReplaced) + { + // We have replaced the root node of this stmt and deleted the rest, + // but we still have the deleted, dead nodes on the `fgMorph*` stack + // if the root node was an `ASG`, `RET` or `CAST`. + // Return a zero con node to exit morphing of the old trees without asserts + // and forbid POST_ORDER morphing doing something wrong with our call. + var_types callType; + if (varTypeIsStruct(origCallType)) + { + CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd; + Compiler::structPassingKind howToReturnStruct; + callType = getReturnTypeForStruct(retClsHnd, call->GetUnmanagedCallConv(), &howToReturnStruct); + assert((howToReturnStruct != SPK_Unknown) && (howToReturnStruct != SPK_ByReference)); + if (howToReturnStruct == SPK_ByValue) + { + callType = TYP_I_IMPL; + } + else if (howToReturnStruct == SPK_ByValueAsHfa || varTypeIsSIMD(callType)) + { + callType = TYP_FLOAT; + } + assert((callType != TYP_UNKNOWN) && !varTypeIsStruct(callType)); + } + else + { + callType = origCallType; + } + assert((callType != TYP_UNKNOWN) && !varTypeIsStruct(callType)); + callType = genActualType(callType); + + GenTree* zero = gtNewZeroConNode(callType); + result = fgMorphTree(zero); + } + else + { + result = call; + } + } + + return result; +} + +//------------------------------------------------------------------------ +// fgMorphTailCallViaHelpers: Transform the given GT_CALL tree for tailcall code +// generation. +// +// Arguments: +// call - The call to transform +// helpers - The tailcall helpers provided by the runtime. +// +// Return Value: +// Returns the transformed node. +// +// Notes: +// This transforms +// GT_CALL +// {callTarget} +// {this} +// {args} +// into +// GT_COMMA +// GT_CALL StoreArgsStub +// {callTarget} (depending on flags provided by the runtime) +// {this} (as a regular arg) +// {args} +// GT_COMMA +// GT_CALL Dispatcher +// GT_ADDR ReturnAddress +// {CallTargetStub} +// GT_ADDR ReturnValue +// GT_LCL ReturnValue +// whenever the call node returns a value. If the call node does not return a +// value the last comma will not be there. +// +GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL_HELPERS& help) +{ + // R2R requires different handling but we don't support tailcall via + // helpers in R2R yet, so just leave it for now. + // TODO: R2R: TailCallViaHelper + assert(!opts.IsReadyToRun()); + + JITDUMP("fgMorphTailCallViaHelpers (before):\n"); + DISPTREE(call); + + // Don't support tail calling helper methods + assert(call->gtCallType != CT_HELPER); + + // We come this route only for tail prefixed calls that cannot be dispatched as + // fast tail calls + assert(!call->IsImplicitTailCall()); + + // We want to use the following assert, but it can modify the IR in some cases, so we + // can't do that in an assert. + // assert(!fgCanFastTailCall(call, nullptr)); + + bool virtualCall = call->IsVirtual(); + + // If VSD then get rid of arg to VSD since we turn this into a direct call. + // The extra arg will be the first arg so this needs to be done before we + // handle the retbuf below. + if (call->IsVirtualStub()) + { + JITDUMP("This is a VSD\n"); +#if FEATURE_FASTTAILCALL + call->ResetArgInfo(); +#endif + + call->gtFlags &= ~GTF_CALL_VIRT_STUB; + } + + GenTree* callDispatcherAndGetResult = fgCreateCallDispatcherAndGetResult(call, help.hCallTarget, help.hDispatcher); + + // Change the call to a call to the StoreArgs stub. + if (call->HasRetBufArg()) + { + JITDUMP("Removing retbuf"); + call->gtCallArgs = call->gtCallArgs->GetNext(); + call->gtCallMoreFlags &= ~GTF_CALL_M_RETBUFFARG; + + // We changed args so recompute info. + call->fgArgInfo = nullptr; + } + + const bool stubNeedsTargetFnPtr = (help.flags & CORINFO_TAILCALL_STORE_TARGET) != 0; + + GenTree* doBeforeStoreArgsStub = nullptr; + GenTree* thisPtrStubArg = nullptr; + + // Put 'this' in normal param list + if (call->gtCallThisArg != nullptr) + { + JITDUMP("Moving this pointer into arg list\n"); + GenTree* objp = call->gtCallThisArg->GetNode(); + GenTree* thisPtr = nullptr; + call->gtCallThisArg = nullptr; + + // JIT will need one or two copies of "this" in the following cases: + // 1) the call needs null check; + // 2) StoreArgs stub needs the target function pointer address and if the call is virtual + // the stub also needs "this" in order to evalute the target. + + const bool callNeedsNullCheck = call->NeedsNullCheck(); + const bool stubNeedsThisPtr = stubNeedsTargetFnPtr && virtualCall; + + // TODO-Review: The following transformation is implemented under assumption that + // both conditions can be true. However, I could not construct such example + // where a virtual tail call would require null check. In case, if the conditions + // are mutually exclusive the following could be simplified. + + if (callNeedsNullCheck || stubNeedsThisPtr) + { + // Clone "this" if "this" has no side effects. + if ((objp->gtFlags & GTF_SIDE_EFFECT) == 0) + { + thisPtr = gtClone(objp, true); + } + + // Create a temp and spill "this" to the temp if "this" has side effects or "this" was too complex to clone. + if (thisPtr == nullptr) + { + const unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); + + // tmp = "this" + doBeforeStoreArgsStub = gtNewTempAssign(lclNum, objp); + + if (callNeedsNullCheck) + { + // COMMA(tmp = "this", deref(tmp)) + GenTree* tmp = gtNewLclvNode(lclNum, objp->TypeGet()); + GenTree* nullcheck = gtNewNullCheck(tmp, compCurBB); + doBeforeStoreArgsStub = gtNewOperNode(GT_COMMA, TYP_VOID, doBeforeStoreArgsStub, nullcheck); + } + + thisPtr = gtNewLclvNode(lclNum, objp->TypeGet()); + + if (stubNeedsThisPtr) + { + thisPtrStubArg = gtNewLclvNode(lclNum, objp->TypeGet()); + } + } + else + { + if (callNeedsNullCheck) + { + // deref("this") + doBeforeStoreArgsStub = gtNewNullCheck(objp, compCurBB); + + if (stubNeedsThisPtr) + { + thisPtrStubArg = gtClone(objp, true); + } + } + else + { + assert(stubNeedsThisPtr); + + thisPtrStubArg = objp; + } + } + + call->gtFlags &= ~GTF_CALL_NULLCHECK; + + assert((thisPtrStubArg != nullptr) == stubNeedsThisPtr); + } + else + { + thisPtr = objp; + } + + // During rationalization tmp="this" and null check will be materialized + // in the right execution order. + assert(thisPtr != nullptr); + call->gtCallArgs = gtPrependNewCallArg(thisPtr, call->gtCallArgs); + call->fgArgInfo = nullptr; + } + + // We may need to pass the target, for instance for calli or generic methods + // where we pass instantiating stub. + if (stubNeedsTargetFnPtr) + { + JITDUMP("Adding target since VM requested it\n"); + GenTree* target; + if (!virtualCall) + { + if (call->gtCallType == CT_INDIRECT) + { + noway_assert(call->gtCallAddr != nullptr); + target = call->gtCallAddr; + } + else + { + CORINFO_CONST_LOOKUP addrInfo; + info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo); + + CORINFO_GENERIC_HANDLE handle = nullptr; + void* pIndirection = nullptr; + assert(addrInfo.accessType != IAT_PPVALUE && addrInfo.accessType != IAT_RELPVALUE); + + if (addrInfo.accessType == IAT_VALUE) + { + handle = addrInfo.handle; + } + else if (addrInfo.accessType == IAT_PVALUE) + { + pIndirection = addrInfo.addr; + } + target = gtNewIconEmbHndNode(handle, pIndirection, GTF_ICON_FTN_ADDR, call->gtCallMethHnd); + } + } + else + { + assert(!call->tailCallInfo->GetSig()->hasTypeArg()); + + CORINFO_CALL_INFO callInfo; + unsigned flags = CORINFO_CALLINFO_LDFTN; + if (call->tailCallInfo->IsCallvirt()) + { + flags |= CORINFO_CALLINFO_CALLVIRT; + } + + eeGetCallInfo(call->tailCallInfo->GetToken(), nullptr, (CORINFO_CALLINFO_FLAGS)flags, &callInfo); + target = getVirtMethodPointerTree(thisPtrStubArg, call->tailCallInfo->GetToken(), &callInfo); + } + + // Insert target as last arg + GenTreeCall::Use** newArgSlot = &call->gtCallArgs; + while (*newArgSlot != nullptr) + { + newArgSlot = &(*newArgSlot)->NextRef(); + } + + *newArgSlot = gtNewCallArgs(target); + + call->fgArgInfo = nullptr; + } + + // This is now a direct call to the store args stub and not a tailcall. + call->gtCallType = CT_USER_FUNC; + call->gtCallMethHnd = help.hStoreArgs; + call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; + call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_DELEGATE_INV | GTF_CALL_M_WRAPPER_DELEGATE_INV); + + // The store-args stub returns no value. + call->gtRetClsHnd = nullptr; + call->gtType = TYP_VOID; + call->gtReturnType = TYP_VOID; + + GenTree* callStoreArgsStub = call; + + if (doBeforeStoreArgsStub != nullptr) + { + callStoreArgsStub = gtNewOperNode(GT_COMMA, TYP_VOID, doBeforeStoreArgsStub, callStoreArgsStub); + } + + GenTree* finalTree = + gtNewOperNode(GT_COMMA, callDispatcherAndGetResult->TypeGet(), callStoreArgsStub, callDispatcherAndGetResult); + + finalTree = fgMorphTree(finalTree); + + JITDUMP("fgMorphTailCallViaHelpers (after):\n"); + DISPTREE(finalTree); + return finalTree; +} + +//------------------------------------------------------------------------ +// fgCreateCallDispatcherAndGetResult: Given a call +// CALL +// {callTarget} +// {retbuf} +// {this} +// {args} +// create a similarly typed node that calls the tailcall dispatcher and returns +// the result, as in the following: +// COMMA +// CALL TailCallDispatcher +// ADDR ReturnAddress +// &CallTargetFunc +// ADDR RetValue +// RetValue +// If the call has type TYP_VOID, only create the CALL node. +// +// Arguments: +// origCall - the call +// callTargetStubHnd - the handle of the CallTarget function (this is a special +// IL stub created by the runtime) +// dispatcherHnd - the handle of the tailcall dispatcher function +// +// Return Value: +// A node that can be used in place of the original call. +// +GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* origCall, + CORINFO_METHOD_HANDLE callTargetStubHnd, + CORINFO_METHOD_HANDLE dispatcherHnd) +{ + GenTreeCall* callDispatcherNode = + gtNewCallNode(CT_USER_FUNC, dispatcherHnd, TYP_VOID, nullptr, fgMorphStmt->GetILOffsetX()); + // The dispatcher has signature + // void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, void* retValue) + + // Add return value arg. + GenTree* retValArg; + GenTree* retVal = nullptr; + unsigned int newRetLcl = BAD_VAR_NUM; + GenTree* copyToRetBufNode = nullptr; + + if (origCall->HasRetBufArg()) + { + JITDUMP("Transferring retbuf\n"); + GenTree* retBufArg = origCall->gtCallArgs->GetNode(); + + assert(info.compRetBuffArg != BAD_VAR_NUM); + assert(retBufArg->OperIsLocal()); + assert(retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg); + + if (info.compRetBuffDefStack) + { + // Use existing retbuf. + retValArg = retBufArg; + } + else + { + // Caller return buffer argument retBufArg can point to GC heap while the dispatcher expects + // the return value argument retValArg to point to the stack. + // We use a temporary stack allocated return buffer to hold the value during the dispatcher call + // and copy the value back to the caller return buffer after that. + unsigned int tmpRetBufNum = lvaGrabTemp(true DEBUGARG("substitute local for return buffer")); + + constexpr bool unsafeValueClsCheck = false; + lvaSetStruct(tmpRetBufNum, origCall->gtRetClsHnd, unsafeValueClsCheck); + lvaSetVarAddrExposed(tmpRetBufNum); + + var_types tmpRetBufType = lvaGetDesc(tmpRetBufNum)->TypeGet(); + + retValArg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(tmpRetBufNum, tmpRetBufType)); + + var_types callerRetBufType = lvaGetDesc(info.compRetBuffArg)->TypeGet(); + + GenTree* dstAddr = gtNewLclvNode(info.compRetBuffArg, callerRetBufType); + GenTree* dst = gtNewObjNode(info.compMethodInfo->args.retTypeClass, dstAddr); + GenTree* src = gtNewLclvNode(tmpRetBufNum, tmpRetBufType); + + constexpr bool isVolatile = false; + constexpr bool isCopyBlock = true; + copyToRetBufNode = gtNewBlkOpNode(dst, src, isVolatile, isCopyBlock); + } + + if (origCall->gtType != TYP_VOID) + { + retVal = gtClone(retBufArg); + } + } + else if (origCall->gtType != TYP_VOID) + { + JITDUMP("Creating a new temp for the return value\n"); + newRetLcl = lvaGrabTemp(false DEBUGARG("Return value for tail call dispatcher")); + if (varTypeIsStruct(origCall->gtType)) + { + lvaSetStruct(newRetLcl, origCall->gtRetClsHnd, false); + } + else + { + // Since we pass a reference to the return value to the dispatcher + // we need to use the real return type so we can normalize it on + // load when we return it. + lvaTable[newRetLcl].lvType = (var_types)origCall->gtReturnType; + } + + lvaSetVarAddrExposed(newRetLcl); + + retValArg = + gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(newRetLcl, genActualType(lvaTable[newRetLcl].lvType))); + retVal = gtNewLclvNode(newRetLcl, genActualType(lvaTable[newRetLcl].lvType)); + + if (varTypeIsStruct(origCall->gtType)) + { + retVal = impFixupStructReturnType(retVal, origCall->gtRetClsHnd, origCall->GetUnmanagedCallConv()); + } + } + else + { + JITDUMP("No return value so using null pointer as arg\n"); + retValArg = gtNewZeroConNode(TYP_I_IMPL); + } + + callDispatcherNode->gtCallArgs = gtPrependNewCallArg(retValArg, callDispatcherNode->gtCallArgs); + + // Add callTarget + callDispatcherNode->gtCallArgs = + gtPrependNewCallArg(new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, callTargetStubHnd), + callDispatcherNode->gtCallArgs); + + // Add the caller's return address slot. + if (lvaRetAddrVar == BAD_VAR_NUM) + { + lvaRetAddrVar = lvaGrabTemp(false DEBUGARG("Return address")); + lvaTable[lvaRetAddrVar].lvType = TYP_I_IMPL; + lvaSetVarAddrExposed(lvaRetAddrVar); + } + + GenTree* retAddrSlot = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaRetAddrVar, TYP_I_IMPL)); + callDispatcherNode->gtCallArgs = gtPrependNewCallArg(retAddrSlot, callDispatcherNode->gtCallArgs); + + GenTree* finalTree = callDispatcherNode; + + if (copyToRetBufNode != nullptr) + { + finalTree = gtNewOperNode(GT_COMMA, TYP_VOID, callDispatcherNode, copyToRetBufNode); + } + + if (origCall->gtType == TYP_VOID) + { + return finalTree; + } + + assert(retVal != nullptr); + finalTree = gtNewOperNode(GT_COMMA, origCall->TypeGet(), finalTree, retVal); + + // The JIT seems to want to CSE this comma and messes up multi-reg ret + // values in the process. Just avoid CSE'ing this tree entirely in that + // case. + if (origCall->HasMultiRegRetVal()) + { + finalTree->gtFlags |= GTF_DONT_CSE; + } + + return finalTree; +} + +//------------------------------------------------------------------------ +// getLookupTree: get a lookup tree +// +// Arguments: +// pResolvedToken - resolved token of the call +// pLookup - the lookup to get the tree for +// handleFlags - flags to set on the result node +// compileTimeHandle - compile-time handle corresponding to the lookup +// +// Return Value: +// A node representing the lookup tree +// +GenTree* Compiler::getLookupTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, + CORINFO_LOOKUP* pLookup, + unsigned handleFlags, + void* compileTimeHandle) +{ + if (!pLookup->lookupKind.needsRuntimeLookup) + { + // No runtime lookup is required. + // Access is direct or memory-indirect (of a fixed address) reference + + CORINFO_GENERIC_HANDLE handle = nullptr; + void* pIndirection = nullptr; + assert(pLookup->constLookup.accessType != IAT_PPVALUE && pLookup->constLookup.accessType != IAT_RELPVALUE); + + if (pLookup->constLookup.accessType == IAT_VALUE) + { + handle = pLookup->constLookup.handle; + } + else if (pLookup->constLookup.accessType == IAT_PVALUE) + { + pIndirection = pLookup->constLookup.addr; + } + + return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, compileTimeHandle); + } + + return getRuntimeLookupTree(pResolvedToken, pLookup, compileTimeHandle); +} + +//------------------------------------------------------------------------ +// getRuntimeLookupTree: get a tree for a runtime lookup +// +// Arguments: +// pResolvedToken - resolved token of the call +// pLookup - the lookup to get the tree for +// compileTimeHandle - compile-time handle corresponding to the lookup +// +// Return Value: +// A node representing the runtime lookup tree +// +GenTree* Compiler::getRuntimeLookupTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, + CORINFO_LOOKUP* pLookup, + void* compileTimeHandle) +{ + assert(!compIsForInlining()); + + CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup; + + // If pRuntimeLookup->indirections is equal to CORINFO_USEHELPER, it specifies that a run-time helper should be + // used; otherwise, it specifies the number of indirections via pRuntimeLookup->offsets array. + if ((pRuntimeLookup->indirections == CORINFO_USEHELPER) || pRuntimeLookup->testForNull || + pRuntimeLookup->testForFixup) + { + // If the first condition is true, runtime lookup tree is available only via the run-time helper function. + // TODO-CQ If the second or third condition is true, we are always using the slow path since we can't + // introduce control flow at this point. See impRuntimeLookupToTree for the logic to avoid calling the helper. + // The long-term solution is to introduce a new node representing a runtime lookup, create instances + // of that node both in the importer and here, and expand the node in lower (introducing control flow if + // necessary). + return gtNewRuntimeLookupHelperCallNode(pRuntimeLookup, + getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind), + compileTimeHandle); + } + + GenTree* result = getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind); + + ArrayStack stmts(getAllocator(CMK_ArrayStack)); + + auto cloneTree = [&](GenTree** tree DEBUGARG(const char* reason)) { + if (!((*tree)->gtFlags & GTF_GLOB_EFFECT)) + { + GenTree* clone = gtClone(*tree, true); + + if (clone) + { + return clone; + } + } + + unsigned temp = lvaGrabTemp(true DEBUGARG(reason)); + stmts.Push(gtNewTempAssign(temp, *tree)); + *tree = gtNewLclvNode(temp, lvaGetActualType(temp)); + return gtNewLclvNode(temp, lvaGetActualType(temp)); + }; + + // Apply repeated indirections + for (WORD i = 0; i < pRuntimeLookup->indirections; i++) + { + GenTree* preInd = nullptr; + if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset)) + { + preInd = cloneTree(&result DEBUGARG("getRuntimeLookupTree indirectOffset")); + } + + if (i != 0) + { + result = gtNewOperNode(GT_IND, TYP_I_IMPL, result); + result->gtFlags |= GTF_IND_NONFAULTING; + result->gtFlags |= GTF_IND_INVARIANT; + } + + if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset)) + { + result = gtNewOperNode(GT_ADD, TYP_I_IMPL, preInd, result); + } + + if (pRuntimeLookup->offsets[i] != 0) + { + result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewIconNode(pRuntimeLookup->offsets[i], TYP_I_IMPL)); + } + } + + assert(!pRuntimeLookup->testForNull); + if (pRuntimeLookup->indirections > 0) + { + assert(!pRuntimeLookup->testForFixup); + result = gtNewOperNode(GT_IND, TYP_I_IMPL, result); + result->gtFlags |= GTF_IND_NONFAULTING; + } + + // Produces GT_COMMA(stmt1, GT_COMMA(stmt2, ... GT_COMMA(stmtN, result))) + + while (!stmts.Empty()) + { + result = gtNewOperNode(GT_COMMA, TYP_I_IMPL, stmts.Pop(), result); + } + + DISPTREE(result); + return result; +} + +//------------------------------------------------------------------------ +// getVirtMethodPointerTree: get a tree for a virtual method pointer +// +// Arguments: +// thisPtr - tree representing `this` pointer +// pResolvedToken - pointer to the resolved token of the method +// pCallInfo - pointer to call info +// +// Return Value: +// A node representing the virtual method pointer + +GenTree* Compiler::getVirtMethodPointerTree(GenTree* thisPtr, + CORINFO_RESOLVED_TOKEN* pResolvedToken, + CORINFO_CALL_INFO* pCallInfo) +{ + GenTree* exactTypeDesc = getTokenHandleTree(pResolvedToken, true); + GenTree* exactMethodDesc = getTokenHandleTree(pResolvedToken, false); + + GenTreeCall::Use* helpArgs = gtNewCallArgs(thisPtr, exactTypeDesc, exactMethodDesc); + return gtNewHelperCallNode(CORINFO_HELP_VIRTUAL_FUNC_PTR, TYP_I_IMPL, helpArgs); +} + +//------------------------------------------------------------------------ +// getTokenHandleTree: get a handle tree for a token +// +// Arguments: +// pResolvedToken - token to get a handle for +// parent - whether parent should be imported +// +// Return Value: +// A node representing the virtual method pointer + +GenTree* Compiler::getTokenHandleTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool parent) +{ + CORINFO_GENERICHANDLE_RESULT embedInfo; + info.compCompHnd->embedGenericHandle(pResolvedToken, parent ? TRUE : FALSE, &embedInfo); + + GenTree* result = getLookupTree(pResolvedToken, &embedInfo.lookup, gtTokenToIconFlags(pResolvedToken->token), + embedInfo.compileTimeHandle); + + // If we have a result and it requires runtime lookup, wrap it in a runtime lookup node. + if ((result != nullptr) && embedInfo.lookup.lookupKind.needsRuntimeLookup) + { + result = gtNewRuntimeLookup(embedInfo.compileTimeHandle, embedInfo.handleType, result); + } + + return result; +} + +/***************************************************************************** + * + * Transform the given GT_CALL tree for tail call via JIT helper. + */ +void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call) +{ + JITDUMP("fgMorphTailCallViaJitHelper (before):\n"); + DISPTREE(call); + + // The runtime requires that we perform a null check on the `this` argument before + // tail calling to a virtual dispatch stub. This requirement is a consequence of limitations + // in the runtime's ability to map an AV to a NullReferenceException if + // the AV occurs in a dispatch stub that has unmanaged caller. + if (call->IsVirtualStub()) + { + call->gtFlags |= GTF_CALL_NULLCHECK; + } + + // For the helper-assisted tail calls, we need to push all the arguments + // into a single list, and then add a few extra at the beginning or end. + // + // For x86, the tailcall helper is defined as: + // + // JIT_TailCall(, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void* + // callTarget) + // + // Note that the special arguments are on the stack, whereas the function arguments follow + // the normal convention: there might be register arguments in ECX and EDX. The stack will + // look like (highest address at the top): + // first normal stack argument + // ... + // last normal stack argument + // numberOfOldStackArgs + // numberOfNewStackArgs + // flags + // callTarget + // + // Each special arg is 4 bytes. + // + // 'flags' is a bitmask where: + // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all + // callee-saved registers for tailcall functions. Note that the helper assumes + // that the callee-saved registers live immediately below EBP, and must have been + // pushed in this order: EDI, ESI, EBX. + // 2 == call target is a virtual stub dispatch. + // + // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details + // on the custom calling convention. + + // Check for PInvoke call types that we don't handle in codegen yet. + assert(!call->IsUnmanaged()); + assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr)); + + // Don't support tail calling helper methods + assert(call->gtCallType != CT_HELPER); + + // We come this route only for tail prefixed calls that cannot be dispatched as + // fast tail calls + assert(!call->IsImplicitTailCall()); + + // We want to use the following assert, but it can modify the IR in some cases, so we + // can't do that in an assert. + // assert(!fgCanFastTailCall(call, nullptr)); + + // First move the 'this' pointer (if any) onto the regular arg list. We do this because + // we are going to prepend special arguments onto the argument list (for non-x86 platforms), + // and thus shift where the 'this' pointer will be passed to a later argument slot. In + // addition, for all platforms, we are going to change the call into a helper call. Our code + // generation code for handling calls to helpers does not handle 'this' pointers. So, when we + // do this transformation, we must explicitly create a null 'this' pointer check, if required, + // since special 'this' pointer handling will no longer kick in. + // + // Some call types, such as virtual vtable calls, require creating a call address expression + // that involves the "this" pointer. Lowering will sometimes create an embedded statement + // to create a temporary that is assigned to the "this" pointer expression, and then use + // that temp to create the call address expression. This temp creation embedded statement + // will occur immediately before the "this" pointer argument, and then will be used for both + // the "this" pointer argument as well as the call address expression. In the normal ordering, + // the embedded statement establishing the "this" pointer temp will execute before both uses + // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the + // normal call argument list, and insert a placeholder which will hold the call address + // expression. For non-x86, things are ok, because the order of execution of these is not + // altered. However, for x86, the call address expression is inserted as the *last* argument + // in the argument list, *after* the "this" pointer. It will be put on the stack, and be + // evaluated first. To ensure we don't end up with out-of-order temp definition and use, + // for those cases where call lowering creates an embedded form temp of "this", we will + // create a temp here, early, that will later get morphed correctly. + + if (call->gtCallThisArg != nullptr) + { + GenTree* thisPtr = nullptr; + GenTree* objp = call->gtCallThisArg->GetNode(); + call->gtCallThisArg = nullptr; + + if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal()) + { + // tmp = "this" + unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); + GenTree* asg = gtNewTempAssign(lclNum, objp); + + // COMMA(tmp = "this", tmp) + var_types vt = objp->TypeGet(); + GenTree* tmp = gtNewLclvNode(lclNum, vt); + thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp); + + objp = thisPtr; + } + + if (call->NeedsNullCheck()) + { + // clone "this" if "this" has no side effects. + if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT)) + { + thisPtr = gtClone(objp, true); + } + + var_types vt = objp->TypeGet(); + if (thisPtr == nullptr) + { + // create a temp if either "this" has side effects or "this" is too complex to clone. + + // tmp = "this" + unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr")); + GenTree* asg = gtNewTempAssign(lclNum, objp); + + // COMMA(tmp = "this", deref(tmp)) + GenTree* tmp = gtNewLclvNode(lclNum, vt); + GenTree* nullcheck = gtNewNullCheck(tmp, compCurBB); + asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, nullcheck); + + // COMMA(COMMA(tmp = "this", deref(tmp)), tmp) + thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt)); + } + else + { + // thisPtr = COMMA(deref("this"), "this") + GenTree* nullcheck = gtNewNullCheck(thisPtr, compCurBB); + thisPtr = gtNewOperNode(GT_COMMA, vt, nullcheck, gtClone(objp, true)); + } + + call->gtFlags &= ~GTF_CALL_NULLCHECK; + } + else + { + thisPtr = objp; + } + + // TODO-Cleanup: we leave it as a virtual stub call to + // use logic in `LowerVirtualStubCall`, clear GTF_CALL_VIRT_KIND_MASK here + // and change `LowerCall` to recognize it as a direct call. + + // During rationalization tmp="this" and null check will + // materialize as embedded stmts in right execution order. + assert(thisPtr != nullptr); + call->gtCallArgs = gtPrependNewCallArg(thisPtr, call->gtCallArgs); + } + + // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will + // append to the list. + GenTreeCall::Use** ppArg = &call->gtCallArgs; + for (GenTreeCall::Use& use : call->Args()) + { + ppArg = &use.NextRef(); + } + assert(ppArg != nullptr); + assert(*ppArg == nullptr); + + unsigned nOldStkArgsWords = + (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; + GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); + *ppArg = gtNewCallArgs(arg3); // numberOfOldStackArgs + ppArg = &((*ppArg)->NextRef()); + + // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. + // The constant will be replaced. + GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL); + *ppArg = gtNewCallArgs(arg2); // numberOfNewStackArgs + ppArg = &((*ppArg)->NextRef()); + + // Inject a placeholder for the flags. + // The constant will be replaced. + GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL); + *ppArg = gtNewCallArgs(arg1); + ppArg = &((*ppArg)->NextRef()); + + // Inject a placeholder for the real call target that the Lowering phase will generate. + // The constant will be replaced. + GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL); + *ppArg = gtNewCallArgs(arg0); + + // It is now a varargs tail call. + call->gtCallMoreFlags |= GTF_CALL_M_VARARGS; + call->gtFlags &= ~GTF_CALL_POP_ARGS; + + // The function is responsible for doing explicit null check when it is necessary. + assert(!call->NeedsNullCheck()); + + JITDUMP("fgMorphTailCallViaJitHelper (after):\n"); + DISPTREE(call); +} + +//------------------------------------------------------------------------ +// fgGetStubAddrArg: Return the virtual stub address for the given call. +// +// Notes: +// the JIT must place the address of the stub used to load the call target, +// the "stub indirection cell", in special call argument with special register. +// +// Arguments: +// call - a call that needs virtual stub dispatching. +// +// Return Value: +// addr tree with set resister requirements. +// +GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) +{ + assert(call->IsVirtualStub()); + GenTree* stubAddrArg; + if (call->gtCallType == CT_INDIRECT) + { + stubAddrArg = gtClone(call->gtCallAddr, true); + } + else + { + assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT); + ssize_t addr = ssize_t(call->gtStubCallStubAddr); + stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR); +#ifdef DEBUG + stubAddrArg->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; +#endif + } + assert(stubAddrArg != nullptr); + stubAddrArg->SetRegNum(virtualStubParamInfo->GetReg()); + return stubAddrArg; +} + +//------------------------------------------------------------------------------ +// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop. +// +// +// Arguments: +// block - basic block ending with a recursive fast tail call +// recursiveTailCall - recursive tail call to transform +// +// Notes: +// The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop. + +void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall) +{ + assert(recursiveTailCall->IsTailCallConvertibleToLoop()); + Statement* lastStmt = block->lastStmt(); + assert(recursiveTailCall == lastStmt->GetRootNode()); + + // Transform recursive tail call into a loop. + + Statement* earlyArgInsertionPoint = lastStmt; + IL_OFFSETX callILOffset = lastStmt->GetILOffsetX(); + + // Hoist arg setup statement for the 'this' argument. + GenTreeCall::Use* thisArg = recursiveTailCall->gtCallThisArg; + if ((thisArg != nullptr) && !thisArg->GetNode()->IsNothingNode() && !thisArg->GetNode()->IsArgPlaceHolderNode()) + { + Statement* thisArgStmt = gtNewStmt(thisArg->GetNode(), callILOffset); + fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt); + } + + // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first; + // then the temps need to be assigned to the method parameters. This is done so that the caller + // parameters are not re-assigned before call arguments depending on them are evaluated. + // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of + // where the next temp or parameter assignment should be inserted. + + // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first + // while the second call argument (const 1) doesn't. + // Basic block before tail recursion elimination: + // ***** BB04, stmt 1 (top level) + // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013) + // [000033] --C - G------ - \--* call void RecursiveMethod + // [000030] ------------ | / --* const int - 1 + // [000031] ------------arg0 in rcx + --* +int + // [000029] ------------ | \--* lclVar int V00 arg1 + // [000032] ------------arg1 in rdx \--* const int 1 + // + // + // Basic block after tail recursion elimination : + // ***** BB04, stmt 1 (top level) + // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) + // [000030] ------------ | / --* const int - 1 + // [000031] ------------ | / --* +int + // [000029] ------------ | | \--* lclVar int V00 arg1 + // [000050] - A---------- \--* = int + // [000049] D------N---- \--* lclVar int V02 tmp0 + // + // ***** BB04, stmt 2 (top level) + // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) + // [000052] ------------ | / --* lclVar int V02 tmp0 + // [000054] - A---------- \--* = int + // [000053] D------N---- \--* lclVar int V00 arg0 + + // ***** BB04, stmt 3 (top level) + // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? ) + // [000032] ------------ | / --* const int 1 + // [000057] - A---------- \--* = int + // [000056] D------N---- \--* lclVar int V01 arg1 + + Statement* tmpAssignmentInsertionPoint = lastStmt; + Statement* paramAssignmentInsertionPoint = lastStmt; + + // Process early args. They may contain both setup statements for late args and actual args. + // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum + // below has the correct second argument. + int earlyArgIndex = (thisArg == nullptr) ? 0 : 1; + for (GenTreeCall::Use& use : recursiveTailCall->Args()) + { + GenTree* earlyArg = use.GetNode(); + if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode()) + { + if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0) + { + // This is a setup node so we need to hoist it. + Statement* earlyArgStmt = gtNewStmt(earlyArg, callILOffset); + fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt); + } + else + { + // This is an actual argument that needs to be assigned to the corresponding caller parameter. + fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); + Statement* paramAssignStmt = + fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset, + tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) + { + // All temp assignments will happen before the first param assignment. + tmpAssignmentInsertionPoint = paramAssignStmt; + } + } + } + earlyArgIndex++; + } + + // Process late args. + int lateArgIndex = 0; + for (GenTreeCall::Use& use : recursiveTailCall->LateArgs()) + { + // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. + GenTree* lateArg = use.GetNode(); + fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); + Statement* paramAssignStmt = + fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset, + tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + + if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) + { + // All temp assignments will happen before the first param assignment. + tmpAssignmentInsertionPoint = paramAssignStmt; + } + lateArgIndex++; + } + + // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that + // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that + // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here. + if (!info.compIsStatic && (lvaArg0Var != info.compThisArg)) + { + var_types thisType = lvaTable[info.compThisArg].TypeGet(); + GenTree* arg0 = gtNewLclvNode(lvaArg0Var, thisType); + GenTree* arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType)); + Statement* arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset); + fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt); + } + + // If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog + // but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization + // for all non-parameter IL locals as well as temp structs with GC fields. + // Liveness phase will remove unnecessary initializations. + if (info.compInitMem || compSuppressedZeroInit) + { + unsigned varNum; + LclVarDsc* varDsc; + for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++) + { +#if FEATURE_FIXED_OUT_ARGS + if (varNum == lvaOutgoingArgSpaceVar) + { + continue; + } +#endif // FEATURE_FIXED_OUT_ARGS + if (!varDsc->lvIsParam) + { + var_types lclType = varDsc->TypeGet(); + bool isUserLocal = (varNum < info.compLocalsCount); + bool structWithGCFields = ((lclType == TYP_STRUCT) && varDsc->GetLayout()->HasGCPtr()); + bool hadSuppressedInit = varDsc->lvSuppressedZeroInit; + if ((info.compInitMem && (isUserLocal || structWithGCFields)) || hadSuppressedInit) + { + GenTree* lcl = gtNewLclvNode(varNum, lclType); + GenTree* init = nullptr; + if (varTypeIsStruct(lclType)) + { + const bool isVolatile = false; + const bool isCopyBlock = false; + init = gtNewBlkOpNode(lcl, gtNewIconNode(0), isVolatile, isCopyBlock); + init = fgMorphInitBlock(init); + } + else + { + GenTree* zero = gtNewZeroConNode(genActualType(lclType)); + init = gtNewAssignNode(lcl, zero); + } + Statement* initStmt = gtNewStmt(init, callILOffset); + fgInsertStmtBefore(block, lastStmt, initStmt); + } + } + } + } + + // Remove the call + fgRemoveStmt(block, lastStmt); + + // Set the loop edge. + if (opts.IsOSR()) + { + // Todo: this may not look like a viable loop header. + // Might need the moral equivalent of a scratch BB. + block->bbJumpDest = fgEntryBB; + } + else + { + // Ensure we have a scratch block and then target the next + // block. Loop detection needs to see a pred out of the loop, + // so mark the scratch block BBF_DONT_REMOVE to prevent empty + // block removal on it. + fgEnsureFirstBBisScratch(); + fgFirstBB->bbFlags |= BBF_DONT_REMOVE; + block->bbJumpDest = fgFirstBB->bbNext; + } + + // Finish hooking things up. + block->bbJumpKind = BBJ_ALWAYS; + fgAddRefPred(block->bbJumpDest, block); + block->bbFlags &= ~BBF_HAS_JMP; +} + +//------------------------------------------------------------------------------ +// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter. +// +// +// Arguments: +// arg - argument to assign +// argTabEntry - argument table entry corresponding to arg +// block --- basic block the call is in +// callILOffset - IL offset of the call +// tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary) +// paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted +// +// Return Value: +// parameter assignment statement if one was inserted; nullptr otherwise. + +Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, + fgArgTabEntry* argTabEntry, + BasicBlock* block, + IL_OFFSETX callILOffset, + Statement* tmpAssignmentInsertionPoint, + Statement* paramAssignmentInsertionPoint) +{ + // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because + // some argument trees may reference parameters directly. + + GenTree* argInTemp = nullptr; + unsigned originalArgNum = argTabEntry->argNum; + bool needToAssignParameter = true; + + // TODO-CQ: enable calls with struct arguments passed in registers. + noway_assert(!varTypeIsStruct(arg->TypeGet())); + + if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) + { + // The argument is already assigned to a temp or is a const. + argInTemp = arg; + } + else if (arg->OperGet() == GT_LCL_VAR) + { + unsigned lclNum = arg->AsLclVar()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[lclNum]; + if (!varDsc->lvIsParam) + { + // The argument is a non-parameter local so it doesn't need to be assigned to a temp. + argInTemp = arg; + } + else if (lclNum == originalArgNum) + { + // The argument is the same parameter local that we were about to assign so + // we can skip the assignment. + needToAssignParameter = false; + } + } + + // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve + // any caller parameters. Some common cases are handled above but we may be able to eliminate + // more temp assignments. + + Statement* paramAssignStmt = nullptr; + if (needToAssignParameter) + { + if (argInTemp == nullptr) + { + // The argument is not assigned to a temp. We need to create a new temp and insert an assignment. + // TODO: we can avoid a temp assignment if we can prove that the argument tree + // doesn't involve any caller parameters. + unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp")); + lvaTable[tmpNum].lvType = arg->gtType; + GenTree* tempSrc = arg; + GenTree* tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType); + GenTree* tmpAssignNode = gtNewAssignNode(tempDest, tempSrc); + Statement* tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset); + fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt); + argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType); + } + + // Now assign the temp to the parameter. + LclVarDsc* paramDsc = lvaTable + originalArgNum; + assert(paramDsc->lvIsParam); + GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType); + GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp); + paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset); + + fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt); + } + return paramAssignStmt; +} + +/***************************************************************************** + * + * Transform the given GT_CALL tree for code generation. + */ + +GenTree* Compiler::fgMorphCall(GenTreeCall* call) +{ + if (call->CanTailCall()) + { + GenTree* newNode = fgMorphPotentialTailCall(call); + if (newNode != nullptr) + { + return newNode; + } + + assert(!call->CanTailCall()); + +#if FEATURE_MULTIREG_RET + if (fgGlobalMorph && call->HasMultiRegRetVal() && varTypeIsStruct(call->TypeGet())) + { + // The tail call has been rejected so we must finish the work deferred + // by impFixupCallStructReturn for multi-reg-returning calls and transform + // ret call + // into + // temp = call + // ret temp + + // Force re-evaluating the argInfo as the return argument has changed. + call->ResetArgInfo(); + + // Create a new temp. + unsigned tmpNum = + lvaGrabTemp(false DEBUGARG("Return value temp for multi-reg return (rejected tail call).")); + lvaTable[tmpNum].lvIsMultiRegRet = true; + + CORINFO_CLASS_HANDLE structHandle = call->gtRetClsHnd; + assert(structHandle != NO_CLASS_HANDLE); + const bool unsafeValueClsCheck = false; + lvaSetStruct(tmpNum, structHandle, unsafeValueClsCheck); + var_types structType = lvaTable[tmpNum].lvType; + GenTree* dst = gtNewLclvNode(tmpNum, structType); + GenTree* assg = gtNewAssignNode(dst, call); + assg = fgMorphTree(assg); + + // Create the assignment statement and insert it before the current statement. + Statement* assgStmt = gtNewStmt(assg, compCurStmt->GetILOffsetX()); + fgInsertStmtBefore(compCurBB, compCurStmt, assgStmt); + + // Return the temp. + GenTree* result = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType); + result->gtFlags |= GTF_DONT_CSE; + + compCurBB->bbFlags |= BBF_HAS_CALL; // This block has a call + +#ifdef DEBUG + if (verbose) + { + printf("\nInserting assignment of a multi-reg call result to a temp:\n"); + gtDispStmt(assgStmt); + } + result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + return result; + } +#endif + } + + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 && + (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR) +#ifdef FEATURE_READYTORUN_COMPILER + || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR) +#endif + ) && + (call == fgMorphStmt->GetRootNode())) + { + // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result. + // Transform it into a null check. + + GenTree* thisPtr = call->gtCallArgs->GetNode(); + + GenTree* nullCheck = gtNewNullCheck(thisPtr, compCurBB); + + return fgMorphTree(nullCheck); + } + + noway_assert(call->gtOper == GT_CALL); + + // + // Only count calls once (only in the global morph phase) + // + if (fgGlobalMorph) + { + if (call->gtCallType == CT_INDIRECT) + { + optCallCount++; + optIndirectCallCount++; + } + else if (call->gtCallType == CT_USER_FUNC) + { + optCallCount++; + if (call->IsVirtual()) + { + optIndirectCallCount++; + } + } + } + + // Couldn't inline - remember that this BB contains method calls + + // Mark the block as a GC safe point for the call if possible. + // In the event the call indicates the block isn't a GC safe point + // and the call is unmanaged with a GC transition suppression request + // then insert a GC poll. + CLANG_FORMAT_COMMENT_ANCHOR; + + if (IsGcSafePoint(call)) + { + compCurBB->bbFlags |= BBF_GC_SAFE_POINT; + } + + // Regardless of the state of the basic block with respect to GC safe point, + // we will always insert a GC Poll for scenarios involving a suppressed GC + // transition. Only mark the block for GC Poll insertion on the first morph. + if (fgGlobalMorph && call->IsUnmanaged() && call->IsSuppressGCTransition()) + { + compCurBB->bbFlags |= (BBF_HAS_SUPPRESSGC_CALL | BBF_GC_SAFE_POINT); + optMethodFlags |= OMF_NEEDS_GCPOLLS; + } + + // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag + // + // We need to do these before the arguments are morphed + if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)) + { + // See if this is foldable + GenTree* optTree = gtFoldExprCall(call); + + // If we optimized, morph the result + if (optTree != call) + { + return fgMorphTree(optTree); + } + } + + compCurBB->bbFlags |= BBF_HAS_CALL; // This block has a call + + // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack. + GenTree* origDest = nullptr; // Will only become non-null if we do the transformation (and thus require + // copy-back). + unsigned retValTmpNum = BAD_VAR_NUM; + CORINFO_CLASS_HANDLE structHnd = nullptr; + if (call->HasRetBufArg() && + call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null). + { + // We're enforcing the invariant that return buffers pointers (at least for + // struct return types containing GC pointers) are never pointers into the heap. + // The large majority of cases are address of local variables, which are OK. + // Otherwise, allocate a local of the given struct type, pass its address, + // then assign from that into the proper destination. (We don't need to do this + // if we're passing the caller's ret buff arg to the callee, since the caller's caller + // will maintain the same invariant.) + + GenTree* dest = call->gtCallArgs->GetNode(); + assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above. + if (dest->TypeIs(TYP_BYREF) && !dest->IsLocalAddrExpr()) + { + // We'll exempt helper calls from this, assuming that the helper implementation + // follows the old convention, and does whatever barrier is required. + if (call->gtCallType != CT_HELPER) + { + structHnd = call->gtRetClsHnd; + if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) && + !(dest->OperGet() == GT_LCL_VAR && dest->AsLclVar()->GetLclNum() == info.compRetBuffArg)) + { + // Force re-evaluating the argInfo as the return argument has changed. + call->fgArgInfo = nullptr; + origDest = dest; + + retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg")); + lvaSetStruct(retValTmpNum, structHnd, true); + dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); + } + } + } + + call->gtCallArgs->SetNode(dest); + } + + /* Process the "normal" argument list */ + call = fgMorphArgs(call); + noway_assert(call->gtOper == GT_CALL); + + // Should we expand this virtual method call target early here? + // + if (call->IsExpandedEarly() && call->IsVirtualVtable()) + { + // We only expand the Vtable Call target once in the global morph phase + if (fgGlobalMorph) + { + assert(call->gtControlExpr == nullptr); // We only call this method and assign gtControlExpr once + call->gtControlExpr = fgExpandVirtualVtableCallTarget(call); + } + // We always have to morph or re-morph the control expr + // + call->gtControlExpr = fgMorphTree(call->gtControlExpr); + + // Propogate any gtFlags into the call + call->gtFlags |= call->gtControlExpr->gtFlags; + } + + // Morph stelem.ref helper call to store a null value, into a store into an array without the helper. + // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place. + if (opts.OptimizationEnabled() && (call->gtCallType == CT_HELPER) && + (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST))) + { + GenTree* value = gtArgEntryByArgNum(call, 2)->GetNode(); + if (value->IsIntegralConst(0)) + { + assert(value->OperGet() == GT_CNS_INT); + + GenTree* arr = gtArgEntryByArgNum(call, 0)->GetNode(); + GenTree* index = gtArgEntryByArgNum(call, 1)->GetNode(); + + // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy + // the spill trees as well if necessary. + GenTreeOp* argSetup = nullptr; + for (GenTreeCall::Use& use : call->Args()) + { + GenTree* const arg = use.GetNode(); + if (arg->OperGet() != GT_ASG) + { + continue; + } + + assert(arg != arr); + assert(arg != index); + + arg->gtFlags &= ~GTF_LATE_ARG; + + GenTree* op1 = argSetup; + if (op1 == nullptr) + { + op1 = gtNewNothingNode(); +#if DEBUG + op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + } + + argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg); + +#if DEBUG + argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + } + +#ifdef DEBUG + auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult { + (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; + return WALK_CONTINUE; + }; + + fgWalkTreePost(&arr, resetMorphedFlag); + fgWalkTreePost(&index, resetMorphedFlag); + fgWalkTreePost(&value, resetMorphedFlag); +#endif // DEBUG + + GenTree* const nullCheckedArr = impCheckForNullPointer(arr); + GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index); + GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value); + arrStore->gtFlags |= GTF_ASG; + + GenTree* result = fgMorphTree(arrStore); + if (argSetup != nullptr) + { + result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result); +#if DEBUG + result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + } + + return result; + } + } + + if (origDest != nullptr) + { + GenTree* retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT)); + // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused + // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the + // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to + // be correct. + if (origDest->OperGet() == GT_ASG) + { + if (origDest->AsOp()->gtOp1->OperGet() == GT_LCL_VAR) + { + GenTree* var = origDest->AsOp()->gtOp1; + origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest, + gtNewLclvNode(var->AsLclVar()->GetLclNum(), var->TypeGet())); + } + } + GenTree* copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false); + copyBlk = fgMorphTree(copyBlk); + GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk); +#ifdef DEBUG + result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + return result; + } + + if (call->IsNoReturn()) + { + // + // If we know that the call does not return then we can set fgRemoveRestOfBlock + // to remove all subsequent statements and change the call's basic block to BBJ_THROW. + // As a result the compiler won't need to preserve live registers across the call. + // + // This isn't need for tail calls as there shouldn't be any code after the call anyway. + // Besides, the tail call code is part of the epilog and converting the block to + // BBJ_THROW would result in the tail call being dropped as the epilog is generated + // only for BBJ_RETURN blocks. + // + + if (!call->IsTailCall()) + { + fgRemoveRestOfBlock = true; + } + } + + return call; +} + +/***************************************************************************** + * + * Expand and return the call target address for a VirtualCall + * The code here should match that generated by LowerVirtualVtableCall + */ + +GenTree* Compiler::fgExpandVirtualVtableCallTarget(GenTreeCall* call) +{ + GenTree* result; + + JITDUMP("Expanding virtual call target for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); + + noway_assert(call->gtCallType == CT_USER_FUNC); + + // get a reference to the thisPtr being passed + fgArgTabEntry* thisArgTabEntry = gtArgEntryByArgNum(call, 0); + GenTree* thisPtr = thisArgTabEntry->GetNode(); + + // fgMorphArgs must enforce this invariant by creating a temp + // + assert(thisPtr->OperIsLocal()); + + // Make a copy of the thisPtr by cloning + // + thisPtr = gtClone(thisPtr, true); + + noway_assert(thisPtr != nullptr); + + // Get hold of the vtable offset + unsigned vtabOffsOfIndirection; + unsigned vtabOffsAfterIndirection; + bool isRelative; + info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection, + &isRelative); + + // Dereference the this pointer to obtain the method table, it is called vtab below + GenTree* vtab; + assert(VPTR_OFFS == 0); // We have to add this value to the thisPtr to get the methodTable + vtab = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr); + vtab->gtFlags |= GTF_IND_INVARIANT; + + // Get the appropriate vtable chunk + if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK) + { + // Note this isRelative code path is currently never executed + // as the VM doesn't ever return: isRelative == true + // + if (isRelative) + { + // MethodTable offset is a relative pointer. + // + // Additional temporary variable is used to store virtual table pointer. + // Address of method is obtained by the next computations: + // + // Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of + // vtable-1st-level-indirection): + // tmp = vtab + // + // Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection): + // result = [tmp + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp + vtabOffsOfIndirection]] + // + // + // When isRelative is true we need to setup two temporary variables + // var1 = vtab + // var2 = var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection] + // result = [var2] + var2 + // + unsigned varNum1 = lvaGrabTemp(true DEBUGARG("var1 - vtab")); + unsigned varNum2 = lvaGrabTemp(true DEBUGARG("var2 - relative")); + GenTree* asgVar1 = gtNewTempAssign(varNum1, vtab); // var1 = vtab + + // [tmp + vtabOffsOfIndirection] + GenTree* tmpTree1 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL), + gtNewIconNode(vtabOffsOfIndirection, TYP_INT)); + tmpTree1 = gtNewOperNode(GT_IND, TYP_I_IMPL, tmpTree1, false); + tmpTree1->gtFlags |= GTF_IND_NONFAULTING; + tmpTree1->gtFlags |= GTF_IND_INVARIANT; + + // var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + GenTree* tmpTree2 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL), + gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_INT)); + + // var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection] + tmpTree2 = gtNewOperNode(GT_ADD, TYP_I_IMPL, tmpTree2, tmpTree1); + GenTree* asgVar2 = gtNewTempAssign(varNum2, tmpTree2); // var2 = + + // This last indirection is not invariant, but is non-faulting + result = gtNewOperNode(GT_IND, TYP_I_IMPL, gtNewLclvNode(varNum2, TYP_I_IMPL), false); // [var2] + result->gtFlags |= GTF_IND_NONFAULTING; + + result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewLclvNode(varNum2, TYP_I_IMPL)); // [var2] + var2 + + // Now stitch together the two assignment and the calculation of result into a single tree + GenTree* commaTree = gtNewOperNode(GT_COMMA, TYP_I_IMPL, asgVar2, result); + result = gtNewOperNode(GT_COMMA, TYP_I_IMPL, asgVar1, commaTree); + } + else + { + // result = [vtab + vtabOffsOfIndirection] + result = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtab, gtNewIconNode(vtabOffsOfIndirection, TYP_INT)); + result = gtNewOperNode(GT_IND, TYP_I_IMPL, result, false); + result->gtFlags |= GTF_IND_NONFAULTING; + result->gtFlags |= GTF_IND_INVARIANT; + } + } + else + { + result = vtab; + assert(!isRelative); + } + + if (!isRelative) + { + // Load the function address + // result = [result + vtabOffsAfterIndirection] + result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewIconNode(vtabOffsAfterIndirection, TYP_INT)); + // This last indirection is not invariant, but is non-faulting + result = gtNewOperNode(GT_IND, TYP_I_IMPL, result, false); + result->gtFlags |= GTF_IND_NONFAULTING; + } + + return result; +} + +/***************************************************************************** + * + * Transform the given GTK_CONST tree for code generation. + */ + +GenTree* Compiler::fgMorphConst(GenTree* tree) +{ + assert(tree->OperKind() & GTK_CONST); + + /* Clear any exception flags or other unnecessary flags + * that may have been set before folding this node to a constant */ + + tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS); + + if (tree->OperGet() != GT_CNS_STR) + { + return tree; + } + + // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will + // guarantee slow performance for that block. Instead cache the return value + // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf. + + bool useLazyStrCns = false; + if (compCurBB->bbJumpKind == BBJ_THROW) + { + useLazyStrCns = true; + } + else if (fgGlobalMorph && compCurStmt->GetRootNode()->IsCall()) + { + // Quick check: if the root node of the current statement happens to be a noreturn call. + GenTreeCall* call = compCurStmt->GetRootNode()->AsCall(); + useLazyStrCns = call->IsNoReturn() || fgIsThrow(call); + } + + if (useLazyStrCns) + { + CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->AsStrCon()->gtScpHnd); + if (helper != CORINFO_HELP_UNDEF) + { + // For un-important blocks, we want to construct the string lazily + + GenTreeCall::Use* args; + if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE) + { + args = gtNewCallArgs(gtNewIconNode(RidFromToken(tree->AsStrCon()->gtSconCPX), TYP_INT)); + } + else + { + args = gtNewCallArgs(gtNewIconNode(RidFromToken(tree->AsStrCon()->gtSconCPX), TYP_INT), + gtNewIconEmbScpHndNode(tree->AsStrCon()->gtScpHnd)); + } + + tree = gtNewHelperCallNode(helper, TYP_REF, args); + return fgMorphTree(tree); + } + } + + assert(tree->AsStrCon()->gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->AsStrCon()->gtScpHnd)); + + LPVOID pValue; + InfoAccessType iat = + info.compCompHnd->constructStringLiteral(tree->AsStrCon()->gtScpHnd, tree->AsStrCon()->gtSconCPX, &pValue); + + tree = gtNewStringLiteralNode(iat, pValue); + + return fgMorphTree(tree); +} + +//------------------------------------------------------------------------ +// fgMorphTryFoldObjAsLclVar: try to fold an Obj node as a LclVar. +// +// Arguments: +// obj - the obj node. +// +// Return value: +// GenTreeLclVar if the obj can be replaced by it, null otherwise. +// +// Notes: +// TODO-CQ: currently this transformation is done only under copy block, +// but it is benefitial to do for each OBJ node. However, `PUT_ARG_STACK` +// for some platforms does not expect struct `LCL_VAR` as a source, so +// it needs more work. +// +GenTreeLclVar* Compiler::fgMorphTryFoldObjAsLclVar(GenTreeObj* obj) +{ + if (opts.OptimizationEnabled()) + { + GenTree* op1 = obj->Addr(); + if (op1->OperIs(GT_ADDR)) + { + GenTreeUnOp* addr = op1->AsUnOp(); + GenTree* addrOp = addr->gtGetOp1(); + if (addrOp->TypeIs(obj->TypeGet()) && addrOp->OperIs(GT_LCL_VAR)) + { + GenTreeLclVar* lclVar = addrOp->AsLclVar(); + + ClassLayout* lclVarLayout = lvaGetDesc(lclVar)->GetLayout(); + ClassLayout* objLayout = obj->GetLayout(); + if (ClassLayout::AreCompatible(lclVarLayout, objLayout)) + { +#ifdef DEBUG + CORINFO_CLASS_HANDLE objClsHandle = obj->GetLayout()->GetClassHandle(); + assert(objClsHandle != NO_CLASS_HANDLE); + if (verbose) + { + CORINFO_CLASS_HANDLE lclClsHnd = gtGetStructHandle(lclVar); + printf("fold OBJ(ADDR(X)) [%06u] into X [%06u], ", dspTreeID(obj), dspTreeID(lclVar)); + printf("with %s handles\n", ((lclClsHnd == objClsHandle) ? "matching" : "different")); + } +#endif + // Keep the DONT_CSE flag in sync + // (as the addr always marks it for its op1) + lclVar->gtFlags &= ~GTF_DONT_CSE; + lclVar->gtFlags |= (obj->gtFlags & GTF_DONT_CSE); + + DEBUG_DESTROY_NODE(obj); + DEBUG_DESTROY_NODE(addr); + return lclVar; + } + } + } + } + return nullptr; +} + +/***************************************************************************** + * + * Transform the given GTK_LEAF tree for code generation. + */ + +GenTree* Compiler::fgMorphLeaf(GenTree* tree) +{ + assert(tree->OperKind() & GTK_LEAF); + + if (tree->gtOper == GT_LCL_VAR) + { + const bool forceRemorph = false; + return fgMorphLocalVar(tree, forceRemorph); + } + else if (tree->gtOper == GT_LCL_FLD) + { + if (lvaGetDesc(tree->AsLclFld())->lvAddrExposed) + { + tree->gtFlags |= GTF_GLOB_REF; + } + +#ifdef TARGET_X86 + if (info.compIsVarArgs) + { + GenTree* newTree = fgMorphStackArgForVarArgs(tree->AsLclFld()->GetLclNum(), tree->TypeGet(), + tree->AsLclFld()->GetLclOffs()); + if (newTree != nullptr) + { + if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) + { + newTree->SetOper(GT_IND); + } + return newTree; + } + } +#endif // TARGET_X86 + } + else if (tree->gtOper == GT_FTN_ADDR) + { + CORINFO_CONST_LOOKUP addrInfo; + +#ifdef FEATURE_READYTORUN_COMPILER + if (tree->AsFptrVal()->gtEntryPoint.addr != nullptr) + { + addrInfo = tree->AsFptrVal()->gtEntryPoint; + } + else +#endif + { + info.compCompHnd->getFunctionFixedEntryPoint(tree->AsFptrVal()->gtFptrMethod, &addrInfo); + } + + GenTree* indNode = nullptr; + switch (addrInfo.accessType) + { + case IAT_PPVALUE: + indNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)addrInfo.handle, GTF_ICON_CONST_PTR, true); + + // Add the second indirection + indNode = gtNewOperNode(GT_IND, TYP_I_IMPL, indNode); + // This indirection won't cause an exception. + indNode->gtFlags |= GTF_IND_NONFAULTING; + // This indirection also is invariant. + indNode->gtFlags |= GTF_IND_INVARIANT; + break; + + case IAT_PVALUE: + indNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)addrInfo.handle, GTF_ICON_FTN_ADDR, true); + break; + + case IAT_VALUE: + // Refer to gtNewIconHandleNode() as the template for constructing a constant handle + // + tree->SetOper(GT_CNS_INT); + tree->AsIntConCommon()->SetIconValue(ssize_t(addrInfo.handle)); + tree->gtFlags |= GTF_ICON_FTN_ADDR; + break; + + default: + noway_assert(!"Unknown addrInfo.accessType"); + } + + if (indNode != nullptr) + { + DEBUG_DESTROY_NODE(tree); + tree = fgMorphTree(indNode); + } + } + + return tree; +} + +void Compiler::fgAssignSetVarDef(GenTree* tree) +{ + GenTreeLclVarCommon* lclVarCmnTree; + bool isEntire = false; + if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire)) + { + if (isEntire) + { + lclVarCmnTree->gtFlags |= GTF_VAR_DEF; + } + else + { + // We consider partial definitions to be modeled as uses followed by definitions. + // This captures the idea that precedings defs are not necessarily made redundant + // by this definition. + lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG); + } + } +} + +//------------------------------------------------------------------------ +// fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment +// +// Arguments: +// tree - The block assignment to be possibly morphed +// +// Return Value: +// The modified tree if successful, nullptr otherwise. +// +// Assumptions: +// 'tree' must be a block assignment. +// +// Notes: +// If successful, this method always returns the incoming tree, modifying only +// its arguments. + +GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) +{ + // This must be a block assignment. + noway_assert(tree->OperIsBlkOp()); + var_types asgType = tree->TypeGet(); + + GenTree* asg = tree; + GenTree* dest = asg->gtGetOp1(); + GenTree* src = asg->gtGetOp2(); + unsigned destVarNum = BAD_VAR_NUM; + LclVarDsc* destVarDsc = nullptr; + GenTree* destLclVarTree = nullptr; + bool isCopyBlock = asg->OperIsCopyBlkOp(); + bool isInitBlock = !isCopyBlock; + + unsigned size = 0; + CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; + + if (dest->gtEffectiveVal()->OperIsBlk()) + { + GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk(); + size = lhsBlk->Size(); + if (impIsAddressInLocal(lhsBlk->Addr(), &destLclVarTree)) + { + destVarNum = destLclVarTree->AsLclVarCommon()->GetLclNum(); + destVarDsc = &(lvaTable[destVarNum]); + } + if (lhsBlk->OperGet() == GT_OBJ) + { + clsHnd = lhsBlk->AsObj()->GetLayout()->GetClassHandle(); + } + } + else + { + // Is this an enregisterable struct that is already a simple assignment? + // This can happen if we are re-morphing. + // Note that we won't do this straightaway if this is a SIMD type, since it + // may be a promoted lclVar (sometimes we promote the individual float fields of + // fixed-size SIMD). + if (dest->OperGet() == GT_IND) + { + noway_assert(asgType != TYP_STRUCT); + if (varTypeIsStruct(asgType)) + { + destLclVarTree = fgIsIndirOfAddrOfLocal(dest); + } + if (isCopyBlock && destLclVarTree == nullptr && !src->OperIs(GT_LCL_VAR)) + { + fgMorphBlockOperand(src, asgType, genTypeSize(asgType), false /*isBlkReqd*/); + return tree; + } + } + else + { + noway_assert(dest->OperIsLocal()); + destLclVarTree = dest; + } + if (destLclVarTree != nullptr) + { + destVarNum = destLclVarTree->AsLclVarCommon()->GetLclNum(); + destVarDsc = &(lvaTable[destVarNum]); + if (asgType == TYP_STRUCT) + { + clsHnd = destVarDsc->GetStructHnd(); + size = destVarDsc->lvExactSize; + } + } + if (asgType != TYP_STRUCT) + { + size = genTypeSize(asgType); + } + } + if (size == 0) + { + return nullptr; + } + + if ((destVarDsc != nullptr) && varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted) + { + // Let fgMorphCopyBlock handle it. + return nullptr; + } + + if (src->IsCall() || src->OperIsSIMD()) + { + // Can't take ADDR from these nodes, let fgMorphCopyBlock handle it, #11413. + return nullptr; + } + + if ((destVarDsc != nullptr) && !varTypeIsStruct(destVarDsc->TypeGet())) + { + + // + // See if we can do a simple transformation: + // + // GT_ASG + // / \. + // GT_IND GT_IND or CNS_INT + // | | + // [dest] [src] + // + + if (asgType == TYP_STRUCT) + { + // It is possible to use `initobj` to init a primitive type on the stack, + // like `ldloca.s 1; initobj 1B000003` where `V01` has type `ref`; + // in this case we generate `ASG struct(BLK<8> struct(ADDR byref(LCL_VAR ref)), 0)` + // and this code path transforms it into `ASG ref(LCL_VARref, 0)` because it is not a real + // struct assignment. + + if (size == REGSIZE_BYTES) + { + if (clsHnd == NO_CLASS_HANDLE) + { + // A register-sized cpblk can be treated as an integer asignment. + asgType = TYP_I_IMPL; + } + else + { + BYTE gcPtr; + info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); + asgType = getJitGCType(gcPtr); + } + } + else + { + switch (size) + { + case 1: + asgType = TYP_BYTE; + break; + case 2: + asgType = TYP_SHORT; + break; + +#ifdef TARGET_64BIT + case 4: + asgType = TYP_INT; + break; +#endif // TARGET_64BIT + } + } + } + } + + GenTree* srcLclVarTree = nullptr; + LclVarDsc* srcVarDsc = nullptr; + if (isCopyBlock) + { + if (src->OperGet() == GT_LCL_VAR) + { + srcLclVarTree = src; + srcVarDsc = &(lvaTable[src->AsLclVarCommon()->GetLclNum()]); + } + else if (src->OperIsIndir() && impIsAddressInLocal(src->AsOp()->gtOp1, &srcLclVarTree)) + { + srcVarDsc = &(lvaTable[srcLclVarTree->AsLclVarCommon()->GetLclNum()]); + } + if ((srcVarDsc != nullptr) && varTypeIsStruct(srcLclVarTree) && srcVarDsc->lvPromoted) + { + // Let fgMorphCopyBlock handle it. + return nullptr; + } + } + + if (asgType != TYP_STRUCT) + { + noway_assert((size <= REGSIZE_BYTES) || varTypeIsSIMD(asgType)); + + // For initBlk, a non constant source is not going to allow us to fiddle + // with the bits to create a single assigment. + // Nor do we (for now) support transforming an InitBlock of SIMD type, unless + // it is a direct assignment to a lclVar and the value is zero. + if (isInitBlock) + { + if (!src->IsConstInitVal()) + { + return nullptr; + } + if (varTypeIsSIMD(asgType) && (!src->IsIntegralConst(0) || (destVarDsc == nullptr))) + { + return nullptr; + } + } + + if (destVarDsc != nullptr) + { +#if LOCAL_ASSERTION_PROP + // Kill everything about dest + if (optLocalAssertionProp) + { + if (optAssertionCount > 0) + { + fgKillDependentAssertions(destVarNum DEBUGARG(tree)); + } + } +#endif // LOCAL_ASSERTION_PROP + + // A previous incarnation of this code also required the local not to be + // address-exposed(=taken). That seems orthogonal to the decision of whether + // to do field-wise assignments: being address-exposed will cause it to be + // "dependently" promoted, so it will be in the right memory location. One possible + // further reason for avoiding field-wise stores is that the struct might have alignment-induced + // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid + // concern, then we could compromise, and say that address-exposed + fields do not completely cover the + // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision. + if (varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted) + { + // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) + return nullptr; + } + else if (!varTypeIsFloating(destLclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc))) + { + // Use the dest local var directly, as well as its type. + dest = destLclVarTree; + asgType = destVarDsc->lvType; + + // If the block operation had been a write to a local var of a small int type, + // of the exact size of the small int type, and the var is NormalizeOnStore, + // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't + // have done that normalization. If we're now making it into an assignment, + // the NormalizeOnStore will work, and it can be a full def. + if (destVarDsc->lvNormalizeOnStore()) + { + dest->gtFlags &= (~GTF_VAR_USEASG); + } + } + else + { + // Could be a non-promoted struct, or a floating point type local, or + // an int subject to a partial write. Don't enregister. + lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField)); + + // Mark the local var tree as a definition point of the local. + destLclVarTree->gtFlags |= GTF_VAR_DEF; + if (size < destVarDsc->lvExactSize) + { // If it's not a full-width assignment.... + destLclVarTree->gtFlags |= GTF_VAR_USEASG; + } + + if (dest == destLclVarTree) + { + GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); + dest = gtNewIndir(asgType, addr); + } + } + } + + // Check to ensure we don't have a reducible *(& ... ) + if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR) + { + // If dest is an Indir or Block, and it has a child that is a Addr node + // + GenTree* addrNode = dest->AsIndir()->Addr(); // known to be a GT_ADDR + + // Can we just remove the Ind(Addr(destOp)) and operate directly on 'destOp'? + // + GenTree* destOp = addrNode->gtGetOp1(); + var_types destOpType = destOp->TypeGet(); + + // We can if we have a primitive integer type and the sizes are exactly the same. + // + if ((varTypeIsIntegralOrI(destOp) && (size == genTypeSize(destOpType)))) + { + dest = destOp; + asgType = destOpType; + } + } + + if (dest->gtEffectiveVal()->OperIsIndir()) + { + // If we have no information about the destination, we have to assume it could + // live anywhere (not just in the GC heap). + // Mark the GT_IND node so that we use the correct write barrier helper in case + // the field is a GC ref. + + if (!fgIsIndirOfAddrOfLocal(dest)) + { + dest->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); + tree->gtFlags |= GTF_GLOB_REF; + } + + dest->SetIndirExceptionFlags(this); + tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT); + } + + if (isCopyBlock) + { + if (srcVarDsc != nullptr) + { + // Handled above. + assert(!varTypeIsStruct(srcLclVarTree) || !srcVarDsc->lvPromoted); + if (!varTypeIsFloating(srcLclVarTree->TypeGet()) && + size == genTypeSize(genActualType(srcLclVarTree->TypeGet()))) + { + // Use the src local var directly. + src = srcLclVarTree; + } + else + { + // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar)) + // or indir(lclVarAddr) so it must be on the stack. + unsigned lclVarNum = srcLclVarTree->AsLclVarCommon()->GetLclNum(); + lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_BlockOp)); + GenTree* srcAddr; + if (src == srcLclVarTree) + { + srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src); + src = gtNewOperNode(GT_IND, asgType, srcAddr); + } + else + { + assert(src->OperIsIndir()); + } + } + } + + if (src->OperIsIndir()) + { + if (!fgIsIndirOfAddrOfLocal(src)) + { + // If we have no information about the src, we have to assume it could + // live anywhere (not just in the GC heap). + // Mark the GT_IND node so that we use the correct write barrier helper in case + // the field is a GC ref. + src->gtFlags |= (GTF_GLOB_REF | GTF_IND_TGTANYWHERE); + } + + src->SetIndirExceptionFlags(this); + } + } + else // InitBlk + { +#if FEATURE_SIMD + if (varTypeIsSIMD(asgType)) + { + assert(!isCopyBlock); // Else we would have returned the tree above. + noway_assert(src->IsIntegralConst(0)); + noway_assert(destVarDsc != nullptr); + + src = new (this, GT_SIMD) + GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->GetSimdBaseJitType(), size); + } + else +#endif + { + if (src->OperIsInitVal()) + { + src = src->gtGetOp1(); + } + assert(src->IsCnsIntOrI()); + // This will mutate the integer constant, in place, to be the correct + // value for the type we are using in the assignment. + src->AsIntCon()->FixupInitBlkValue(asgType); + } + } + + // Ensure that the dest is setup appropriately. + if (dest->gtEffectiveVal()->OperIsIndir()) + { + dest = fgMorphBlockOperand(dest, asgType, size, false /*isBlkReqd*/); + } + + // Ensure that the rhs is setup appropriately. + if (isCopyBlock) + { + src = fgMorphBlockOperand(src, asgType, size, false /*isBlkReqd*/); + } + + // Set the lhs and rhs on the assignment. + if (dest != tree->AsOp()->gtOp1) + { + asg->AsOp()->gtOp1 = dest; + } + if (src != asg->AsOp()->gtOp2) + { + asg->AsOp()->gtOp2 = src; + } + + asg->ChangeType(asgType); + dest->gtFlags |= GTF_DONT_CSE; + asg->gtFlags &= ~GTF_EXCEPT; + asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT); + // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate. + asg->gtFlags &= ~GTF_REVERSE_OPS; + +#ifdef DEBUG + if (verbose) + { + printf("fgMorphOneAsgBlock (after):\n"); + gtDispTree(tree); + } +#endif + return tree; + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// fgMorphInitBlock: Morph a block initialization assignment tree. +// +// Arguments: +// tree - A GT_ASG tree that performs block initialization +// +// Return Value: +// A single assignment, when fgMorphOneAsgBlockOp transforms it. +// +// If the destination is a promoted struct local variable then we will try to +// perform a field by field assignment for each of the promoted struct fields. +// This is not always possible (e.g. if the struct has holes and custom layout). +// +// Otherwise the orginal GT_ASG tree is returned unmodified (always correct but +// least desirable because it prevents enregistration and/or blocks independent +// struct promotion). +// +// Assumptions: +// GT_ASG's children have already been morphed. +// +GenTree* Compiler::fgMorphInitBlock(GenTree* tree) +{ + // We must have the GT_ASG form of InitBlkOp. + noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp()); +#ifdef DEBUG + bool morphed = false; +#endif // DEBUG + + GenTree* src = tree->gtGetOp2(); + GenTree* origDest = tree->gtGetOp1(); + + GenTree* dest = fgMorphBlkNode(origDest, true); + if (dest != origDest) + { + tree->AsOp()->gtOp1 = dest; + } + tree->gtType = dest->TypeGet(); + JITDUMP("\nfgMorphInitBlock:"); + + GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); + if (oneAsgTree) + { + JITDUMP(" using oneAsgTree.\n"); + tree = oneAsgTree; + } + else + { + GenTreeLclVarCommon* destLclNode = nullptr; + unsigned destLclNum = BAD_VAR_NUM; + LclVarDsc* destLclVar = nullptr; + GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src; + unsigned blockSize = 0; + + if (dest->IsLocal()) + { + destLclNode = dest->AsLclVarCommon(); + destLclNum = destLclNode->GetLclNum(); + destLclVar = lvaGetDesc(destLclNum); + blockSize = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar->TypeGet()); + } + else + { + blockSize = dest->AsBlk()->Size(); + + FieldSeqNode* destFldSeq = nullptr; + if (dest->AsIndir()->Addr()->IsLocalAddrExpr(this, &destLclNode, &destFldSeq)) + { + destLclNum = destLclNode->GetLclNum(); + destLclVar = lvaGetDesc(destLclNum); + } + } + + bool destDoFldAsg = false; + + if (destLclNum != BAD_VAR_NUM) + { +#if LOCAL_ASSERTION_PROP + // Kill everything about destLclNum (and its field locals) + if (optLocalAssertionProp && (optAssertionCount > 0)) + { + fgKillDependentAssertions(destLclNum DEBUGARG(tree)); + } +#endif // LOCAL_ASSERTION_PROP + + // If we have already determined that a promoted TYP_STRUCT lclVar will not be enregistered, + // we are better off doing a block init. + if (destLclVar->lvPromoted && (!destLclVar->lvDoNotEnregister || !destLclNode->TypeIs(TYP_STRUCT))) + { + GenTree* newTree = fgMorphPromoteLocalInitBlock(destLclNode->AsLclVar(), initVal, blockSize); + + if (newTree != nullptr) + { + tree = newTree; + destDoFldAsg = true; + INDEBUG(morphed = true); + } + } + + // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister. + if (!destDoFldAsg && !destLclVar->lvRegStruct) + { + lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); + } + } + + if (!destDoFldAsg) + { + // For an InitBlock we always require a block operand. + dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockSize, true /*isBlkReqd*/); + tree->AsOp()->gtOp1 = dest; + tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); + } + } + +#ifdef DEBUG + if (morphed) + { + tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; + + if (verbose) + { + printf("fgMorphInitBlock (after):\n"); + gtDispTree(tree); + } + } +#endif + + return tree; +} + +//------------------------------------------------------------------------ +// fgMorphPromoteLocalInitBlock: Attempts to promote a local block init tree +// to a tree of promoted field initialization assignments. +// +// Arguments: +// destLclNode - The destination LclVar node +// initVal - The initialization value +// blockSize - The amount of bytes to initialize +// +// Return Value: +// A tree that performs field by field initialization of the destination +// struct variable if various conditions are met, nullptr otherwise. +// +// Notes: +// This transforms a single block initialization assignment like: +// +// * ASG struct (init) +// +--* BLK(12) struct +// | \--* ADDR long +// | \--* LCL_VAR struct(P) V02 loc0 +// | \--* int V02.a (offs=0x00) -> V06 tmp3 +// | \--* ubyte V02.c (offs=0x04) -> V07 tmp4 +// | \--* float V02.d (offs=0x08) -> V08 tmp5 +// \--* INIT_VAL int +// \--* CNS_INT int 42 +// +// into a COMMA tree of assignments that initialize each promoted struct +// field: +// +// * COMMA void +// +--* COMMA void +// | +--* ASG int +// | | +--* LCL_VAR int V06 tmp3 +// | | \--* CNS_INT int 0x2A2A2A2A +// | \--* ASG ubyte +// | +--* LCL_VAR ubyte V07 tmp4 +// | \--* CNS_INT int 42 +// \--* ASG float +// +--* LCL_VAR float V08 tmp5 +// \--* CNS_DBL float 1.5113661732714390e-13 +// +GenTree* Compiler::fgMorphPromoteLocalInitBlock(GenTreeLclVar* destLclNode, GenTree* initVal, unsigned blockSize) +{ + assert(destLclNode->OperIs(GT_LCL_VAR)); + + LclVarDsc* destLclVar = lvaGetDesc(destLclNode); + assert(varTypeIsStruct(destLclVar->TypeGet())); + assert(destLclVar->lvPromoted); + + if (blockSize == 0) + { + JITDUMP(" size is zero or unknown.\n"); + return nullptr; + } + + if (destLclVar->lvAddrExposed && destLclVar->lvContainsHoles) + { + JITDUMP(" dest is address exposed and contains holes.\n"); + return nullptr; + } + + if (destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) + { + JITDUMP(" dest has custom layout and contains holes.\n"); + return nullptr; + } + + if (destLclVar->lvExactSize != blockSize) + { + JITDUMP(" dest size mismatch.\n"); + return nullptr; + } + + if (!initVal->OperIs(GT_CNS_INT)) + { + JITDUMP(" source is not constant.\n"); + return nullptr; + } + + const int64_t initPattern = (initVal->AsIntCon()->IconValue() & 0xFF) * 0x0101010101010101LL; + + if (initPattern != 0) + { + for (unsigned i = 0; i < destLclVar->lvFieldCnt; ++i) + { + LclVarDsc* fieldDesc = lvaGetDesc(destLclVar->lvFieldLclStart + i); + + if (varTypeIsSIMD(fieldDesc->TypeGet()) || varTypeIsGC(fieldDesc->TypeGet())) + { + // Cannot initialize GC or SIMD types with a non-zero constant. + // The former is completly bogus. The later restriction could be + // lifted by supporting non-zero SIMD constants or by generating + // field initialization code that converts an integer constant to + // the appropiate SIMD value. Unlikely to be very useful, though. + JITDUMP(" dest contains GC and/or SIMD fields and source constant is not 0.\n"); + return nullptr; + } + } + } + + JITDUMP(" using field by field initialization.\n"); + + GenTree* tree = nullptr; + + for (unsigned i = 0; i < destLclVar->lvFieldCnt; ++i) + { + unsigned fieldLclNum = destLclVar->lvFieldLclStart + i; + LclVarDsc* fieldDesc = lvaGetDesc(fieldLclNum); + GenTree* dest = gtNewLclvNode(fieldLclNum, fieldDesc->TypeGet()); + // If it had been labeled a "USEASG", assignments to the individual promoted fields are not. + dest->gtFlags |= (destLclNode->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG)); + + GenTree* src; + + switch (dest->TypeGet()) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + // Promoted fields are expected to be "normalize on load". If that changes then + // we may need to adjust this code to widen the constant correctly. + assert(fieldDesc->lvNormalizeOnLoad()); + FALLTHROUGH; + case TYP_INT: + { + int64_t mask = (int64_t(1) << (genTypeSize(dest->TypeGet()) * 8)) - 1; + src = gtNewIconNode(static_cast(initPattern & mask)); + break; + } + case TYP_LONG: + src = gtNewLconNode(initPattern); + break; + case TYP_FLOAT: + float floatPattern; + memcpy(&floatPattern, &initPattern, sizeof(floatPattern)); + src = gtNewDconNode(floatPattern, dest->TypeGet()); + break; + case TYP_DOUBLE: + double doublePattern; + memcpy(&doublePattern, &initPattern, sizeof(doublePattern)); + src = gtNewDconNode(doublePattern, dest->TypeGet()); + break; + case TYP_REF: + case TYP_BYREF: +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: +#endif // FEATURE_SIMD + assert(initPattern == 0); + src = gtNewIconNode(0, dest->TypeGet()); + break; + default: + unreached(); + } + + GenTree* asg = gtNewAssignNode(dest, src); + +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) + { + optAssertionGen(asg); + } +#endif // LOCAL_ASSERTION_PROP + + if (tree != nullptr) + { + tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); + } + else + { + tree = asg; + } + } + + return tree; +} + +//------------------------------------------------------------------------ +// fgMorphGetStructAddr: Gets the address of a struct object +// +// Arguments: +// pTree - the parent's pointer to the struct object node +// clsHnd - the class handle for the struct type +// isRValue - true if this is a source (not dest) +// +// Return Value: +// Returns the address of the struct value, possibly modifying the existing tree to +// sink the address below any comma nodes (this is to canonicalize for value numbering). +// If this is a source, it will morph it to an GT_IND before taking its address, +// since it may not be remorphed (and we don't want blk nodes as rvalues). + +GenTree* Compiler::fgMorphGetStructAddr(GenTree** pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue) +{ + GenTree* addr; + GenTree* tree = *pTree; + // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we + // need to hang onto that for the purposes of value numbering. + if (tree->OperIsIndir()) + { + if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0) + { + addr = tree->AsOp()->gtOp1; + } + else + { + if (isRValue && tree->OperIsBlk()) + { + tree->ChangeOper(GT_IND); + } + addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); + } + } + else if (tree->gtOper == GT_COMMA) + { + // If this is a comma, we're going to "sink" the GT_ADDR below it. + (void)fgMorphGetStructAddr(&(tree->AsOp()->gtOp2), clsHnd, isRValue); + tree->gtType = TYP_BYREF; + addr = tree; + } + else + { + switch (tree->gtOper) + { + case GT_LCL_FLD: + case GT_LCL_VAR: + case GT_INDEX: + case GT_FIELD: + case GT_ARR_ELEM: + addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree); + break; + case GT_INDEX_ADDR: + addr = tree; + break; + default: + { + // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're + // not going to use "temp" + GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd); + unsigned lclNum = temp->gtEffectiveVal()->AsLclVar()->GetLclNum(); + lvaSetVarDoNotEnregister(lclNum DEBUG_ARG(DNER_VMNeedsStackAddr)); + addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue); + break; + } + } + } + *pTree = addr; + return addr; +} + +//------------------------------------------------------------------------ +// fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment +// +// Arguments: +// tree - The struct type node +// isDest - True if this is the destination of the assignment +// +// Return Value: +// Returns the possibly-morphed node. The caller is responsible for updating +// the parent of this node.. + +GenTree* Compiler::fgMorphBlkNode(GenTree* tree, bool isDest) +{ + JITDUMP("fgMorphBlkNode for %s tree, before:\n", (isDest ? "dst" : "src")); + DISPTREE(tree); + GenTree* handleTree = nullptr; + GenTree* addr = nullptr; + if (tree->OperIs(GT_COMMA)) + { + // In order to CSE and value number array index expressions and bounds checks, + // the commas in which they are contained need to match. + // The pattern is that the COMMA should be the address expression. + // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind. + // TODO-1stClassStructs: Consider whether this can be improved. + // Example: + // before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct + // after: [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct + + addr = tree; + GenTree* effectiveVal = tree->gtEffectiveVal(); + + GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); + for (GenTree* comma = tree; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) + { + commas.Push(comma); + } + + GenTree* lastComma = commas.Top(); + noway_assert(lastComma->gtGetOp2() == effectiveVal); + GenTree* effectiveValAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); +#ifdef DEBUG + effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + lastComma->AsOp()->gtOp2 = effectiveValAddr; + + while (!commas.Empty()) + { + GenTree* comma = commas.Pop(); + comma->gtType = TYP_BYREF; + gtUpdateNodeSideEffects(comma); + } + + handleTree = effectiveVal; + } + else if (tree->OperIs(GT_IND) && tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR)) + { + handleTree = tree; + addr = tree->AsIndir()->Addr(); + } + + if (addr != nullptr) + { + var_types structType = handleTree->TypeGet(); + if (structType == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(handleTree); + if (structHnd == NO_CLASS_HANDLE) + { + tree = gtNewOperNode(GT_IND, structType, addr); + } + else + { + tree = gtNewObjNode(structHnd, addr); + gtSetObjGcInfo(tree->AsObj()); + } + } + else + { + tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, typGetBlkLayout(genTypeSize(structType))); + } + + gtUpdateNodeSideEffects(tree); +#ifdef DEBUG + tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + } + + if (!tree->OperIsBlk()) + { + JITDUMP("fgMorphBlkNode after:\n"); + DISPTREE(tree); + return tree; + } + GenTreeBlk* blkNode = tree->AsBlk(); + if (blkNode->OperGet() == GT_DYN_BLK) + { + if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI()) + { + unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue(); + // A GT_BLK with size of zero is not supported, + // so if we encounter such a thing we just leave it as a GT_DYN_BLK + if (size != 0) + { + blkNode->AsDynBlk()->gtDynamicSize = nullptr; + blkNode->ChangeOper(GT_BLK); + blkNode->SetLayout(typGetBlkLayout(size)); + } + else + { + JITDUMP("fgMorphBlkNode after, DYN_BLK with zero size can't be morphed:\n"); + DISPTREE(blkNode); + return blkNode; + } + } + else + { + JITDUMP("fgMorphBlkNode after, DYN_BLK with non-const size can't be morphed:\n"); + DISPTREE(blkNode); + return blkNode; + } + } + GenTree* blkSrc = blkNode->Addr(); + assert(blkSrc != nullptr); + if (!blkNode->TypeIs(TYP_STRUCT) && blkSrc->OperIs(GT_ADDR) && blkSrc->gtGetOp1()->OperIs(GT_LCL_VAR)) + { + GenTreeLclVarCommon* lclVarNode = blkSrc->gtGetOp1()->AsLclVarCommon(); + if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode))) + { + lvaSetVarDoNotEnregister(lclVarNode->GetLclNum() DEBUG_ARG(DNER_VMNeedsStackAddr)); + } + } + + JITDUMP("fgMorphBlkNode after:\n"); + DISPTREE(tree); + return tree; +} + +//------------------------------------------------------------------------ +// fgMorphBlockOperand: Canonicalize an operand of a block assignment +// +// Arguments: +// tree - The block operand +// asgType - The type of the assignment +// blockWidth - The size of the block +// isBlkReqd - true iff this operand must remain a block node +// +// Return Value: +// Returns the morphed block operand +// +// Notes: +// This does the following: +// - Ensures that a struct operand is a block node or lclVar. +// - Ensures that any COMMAs are above ADDR nodes. +// Although 'tree' WAS an operand of a block assignment, the assignment +// may have been retyped to be a scalar assignment. + +GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isBlkReqd) +{ + GenTree* effectiveVal = tree->gtEffectiveVal(); + + if (asgType != TYP_STRUCT) + { + if (effectiveVal->OperIsIndir()) + { + if (!isBlkReqd) + { + GenTree* addr = effectiveVal->AsIndir()->Addr(); + if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType)) + { + effectiveVal = addr->gtGetOp1(); + } + else if (effectiveVal->OperIsBlk()) + { + effectiveVal->SetOper(GT_IND); + } + } + effectiveVal->gtType = asgType; + } + else if (effectiveVal->TypeGet() != asgType) + { + if (effectiveVal->IsCall()) + { +#ifdef DEBUG + GenTreeCall* call = effectiveVal->AsCall(); + assert(call->TypeGet() == TYP_STRUCT); + assert(blockWidth == info.compCompHnd->getClassSize(call->gtRetClsHnd)); +#endif + } + else + { + GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); + effectiveVal = gtNewIndir(asgType, addr); + } + } + } + else + { + GenTreeIndir* indirTree = nullptr; + GenTreeLclVarCommon* lclNode = nullptr; + bool needsIndirection = true; + + if (effectiveVal->OperIsIndir()) + { + indirTree = effectiveVal->AsIndir(); + GenTree* addr = effectiveVal->AsIndir()->Addr(); + if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR)) + { + lclNode = addr->gtGetOp1()->AsLclVarCommon(); + } + } + else if (effectiveVal->OperGet() == GT_LCL_VAR) + { + lclNode = effectiveVal->AsLclVarCommon(); + } + else if (effectiveVal->IsCall()) + { + needsIndirection = false; +#ifdef DEBUG + GenTreeCall* call = effectiveVal->AsCall(); + assert(call->TypeGet() == TYP_STRUCT); + assert(blockWidth == info.compCompHnd->getClassSize(call->gtRetClsHnd)); +#endif + } + + if (lclNode != nullptr) + { + LclVarDsc* varDsc = &(lvaTable[lclNode->GetLclNum()]); + if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth) && (varDsc->lvType == asgType)) + { + if (effectiveVal != lclNode) + { + JITDUMP("Replacing block node [%06d] with lclVar V%02u\n", dspTreeID(tree), lclNode->GetLclNum()); + effectiveVal = lclNode; + } + needsIndirection = false; + } + else + { + // This may be a lclVar that was determined to be address-exposed. + effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT); + } + } + if (needsIndirection) + { + if (indirTree != nullptr) + { + if (indirTree->OperIsBlk() && !isBlkReqd) + { + effectiveVal->SetOper(GT_IND); + } + else + { + // If we have an indirection and a block is required, it should already be a block. + assert(indirTree->OperIsBlk() || !isBlkReqd); + } + effectiveVal->gtType = asgType; + } + else + { + GenTree* newTree; + GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); + if (isBlkReqd) + { + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal); + if (clsHnd == NO_CLASS_HANDLE) + { + newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, typGetBlkLayout(blockWidth)); + } + else + { + newTree = gtNewObjNode(clsHnd, addr); + gtSetObjGcInfo(newTree->AsObj()); + } + } + else + { + newTree = gtNewIndir(asgType, addr); + } + effectiveVal = newTree; + } + } + } + assert(effectiveVal->TypeIs(asgType) || (varTypeIsSIMD(asgType) && varTypeIsStruct(effectiveVal))); + tree = effectiveVal; + return tree; +} + +//------------------------------------------------------------------------ +// fgMorphCopyBlock: Perform the Morphing of block copy +// +// Arguments: +// tree - a block copy (i.e. an assignment with a block op on the lhs). +// +// Return Value: +// We can return the orginal block copy unmodified (least desirable, but always correct) +// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable). +// If we have performed struct promotion of the Source() or the Dest() then we will try to +// perform a field by field assignment for each of the promoted struct fields. +// +// Assumptions: +// The child nodes for tree have already been Morphed. +// +// Notes: +// If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest(). +// When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes +// and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes. +// if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we +// can not use a field by field assignment and must leave the orginal block copy unmodified. + +GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) +{ + noway_assert(tree->OperIsCopyBlkOp()); + + JITDUMP("fgMorphCopyBlock:\n"); + + bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0; + + GenTreeOp* asg = tree->AsOp(); + GenTree* src = asg->gtGetOp2(); + GenTree* dest = asg->gtGetOp1(); + +#if FEATURE_MULTIREG_RET + // If this is a multi-reg return, we will not do any morphing of this node. + if (src->IsMultiRegCall()) + { + assert(dest->OperGet() == GT_LCL_VAR); + JITDUMP(" not morphing a multireg call return\n"); + return tree; + } + else if (dest->IsMultiRegLclVar() && !src->IsMultiRegNode()) + { + dest->AsLclVar()->ClearMultiReg(); + } +#endif // FEATURE_MULTIREG_RET + + if (src->IsCall()) + { + if (dest->OperIs(GT_OBJ)) + { + GenTreeLclVar* lclVar = fgMorphTryFoldObjAsLclVar(dest->AsObj()); + if (lclVar != nullptr) + { + dest = lclVar; + asg->gtOp1 = lclVar; + } + } + + if (dest->OperIs(GT_LCL_VAR)) + { + LclVarDsc* varDsc = lvaGetDesc(dest->AsLclVar()); + if (varTypeIsStruct(varDsc) && varDsc->CanBeReplacedWithItsField(this)) + { + JITDUMP(" not morphing a single reg call return\n"); + return tree; + } + } + } + + // If we have an array index on the lhs, we need to create an obj node. + + dest = fgMorphBlkNode(dest, true); + if (dest != asg->gtGetOp1()) + { + asg->gtOp1 = dest; + if (dest->IsLocal()) + { + dest->gtFlags |= GTF_VAR_DEF; + } + } +#ifdef DEBUG + if (asg->TypeGet() != dest->TypeGet()) + { + JITDUMP("changing type of dest from %-6s to %-6s\n", varTypeName(asg->TypeGet()), varTypeName(dest->TypeGet())); + } +#endif + asg->ChangeType(dest->TypeGet()); + src = fgMorphBlkNode(src, false); + + asg->gtOp2 = src; + + GenTree* oldTree = tree; + GenTree* oneAsgTree = fgMorphOneAsgBlockOp(tree); + + if (oneAsgTree) + { + JITDUMP(" using oneAsgTree.\n"); + tree = oneAsgTree; + } + else + { + unsigned blockWidth; + bool blockWidthIsConst = false; + GenTreeLclVarCommon* lclVarTree = nullptr; + GenTreeLclVarCommon* srcLclVarTree = nullptr; + unsigned destLclNum = BAD_VAR_NUM; + unsigned modifiedLclNum = BAD_VAR_NUM; + LclVarDsc* destLclVar = nullptr; + FieldSeqNode* destFldSeq = nullptr; + unsigned destLclOffset = 0; + bool destDoFldAsg = false; + GenTree* destAddr = nullptr; + GenTree* srcAddr = nullptr; + bool destOnStack = false; + bool hasGCPtrs = false; + + JITDUMP("block assignment to morph:\n"); + DISPTREE(asg); + + if (dest->IsLocal()) + { + blockWidthIsConst = true; + destOnStack = true; + modifiedLclNum = dest->AsLclVarCommon()->GetLclNum(); + if (dest->gtOper == GT_LCL_VAR) + { + lclVarTree = dest->AsLclVarCommon(); + destLclNum = modifiedLclNum; + destLclVar = &lvaTable[destLclNum]; + if (destLclVar->lvType == TYP_STRUCT) + { + // It would be nice if lvExactSize always corresponded to the size of the struct, + // but it doesn't always for the temps that the importer creates when it spills side + // effects. + // TODO-Cleanup: Determine when this happens, and whether it can be changed. + blockWidth = info.compCompHnd->getClassSize(destLclVar->GetStructHnd()); + } + else + { + blockWidth = genTypeSize(destLclVar->lvType); + } + hasGCPtrs = destLclVar->HasGCPtr(); + } + else + { + assert(dest->TypeGet() != TYP_STRUCT); + assert(dest->gtOper == GT_LCL_FLD); + GenTreeLclFld* destFld = dest->AsLclFld(); + blockWidth = genTypeSize(destFld->TypeGet()); + destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, destFld); + destFldSeq = destFld->GetFieldSeq(); + destLclOffset = destFld->GetLclOffs(); + } + } + else + { + GenTree* effectiveDest = dest->gtEffectiveVal(); + if (effectiveDest->OperGet() == GT_IND) + { + assert(dest->TypeGet() != TYP_STRUCT); + blockWidth = genTypeSize(effectiveDest->TypeGet()); + blockWidthIsConst = true; + if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) + { + destAddr = dest->gtGetOp1(); + } + } + else + { + assert(effectiveDest->OperIsBlk()); + GenTreeBlk* blk = effectiveDest->AsBlk(); + + blockWidth = blk->Size(); + blockWidthIsConst = (blk->gtOper != GT_DYN_BLK); + if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0)) + { + destAddr = blk->Addr(); + } + } + if (destAddr != nullptr) + { + noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL); + if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq)) + { + destOnStack = true; + destLclNum = lclVarTree->GetLclNum(); + modifiedLclNum = destLclNum; + destLclVar = &lvaTable[destLclNum]; + destLclOffset = lclVarTree->GetLclOffs(); + } + } + } + +#if LOCAL_ASSERTION_PROP + // Kill everything about modifiedLclNum (and its field locals) + if ((modifiedLclNum != BAD_VAR_NUM) && optLocalAssertionProp) + { + if (optAssertionCount > 0) + { + fgKillDependentAssertions(modifiedLclNum DEBUGARG(tree)); + } + } +#endif // LOCAL_ASSERTION_PROP + + if (destLclVar != nullptr) + { + if (destLclVar->lvPromoted && blockWidthIsConst) + { + noway_assert(varTypeIsStruct(destLclVar)); + noway_assert(!opts.MinOpts()); + + if (blockWidth == destLclVar->lvExactSize) + { + JITDUMP(" (destDoFldAsg=true)"); + // We may decide later that a copyblk is required when this struct has holes + destDoFldAsg = true; + } + else + { + JITDUMP(" with mismatched dest size"); + } + } + } + + FieldSeqNode* srcFldSeq = nullptr; + unsigned srcLclNum = BAD_VAR_NUM; + LclVarDsc* srcLclVar = nullptr; + unsigned srcLclOffset = 0; + bool srcDoFldAsg = false; + + bool srcUseLclFld = false; + bool destUseLclFld = false; + + if (src->IsLocal()) + { + srcLclVarTree = src->AsLclVarCommon(); + srcLclNum = srcLclVarTree->GetLclNum(); + if (src->OperGet() == GT_LCL_FLD) + { + srcFldSeq = src->AsLclFld()->GetFieldSeq(); + } + } + else if (src->OperIsIndir()) + { + if (src->AsOp()->gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq)) + { + srcLclNum = srcLclVarTree->GetLclNum(); + } + else + { + srcAddr = src->AsOp()->gtOp1; + } + } + + if (srcLclNum != BAD_VAR_NUM) + { + srcLclOffset = srcLclVarTree->GetLclOffs(); + srcLclVar = &lvaTable[srcLclNum]; + + if (srcLclVar->lvPromoted && blockWidthIsConst) + { + noway_assert(varTypeIsStruct(srcLclVar)); + noway_assert(!opts.MinOpts()); + + if (blockWidth == srcLclVar->lvExactSize) + { + JITDUMP(" (srcDoFldAsg=true)"); + // We may decide later that a copyblk is required when this struct has holes + srcDoFldAsg = true; + } + else + { + JITDUMP(" with mismatched src size"); + } + } + } + + // Check to see if we are doing a copy to/from the same local block. + // If so, morph it to a nop. + if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq) && + destFldSeq != FieldSeqStore::NotAField()) + { + JITDUMP("Self-copy; replaced with a NOP.\n"); + GenTree* nop = gtNewNothingNode(); + INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return nop; + } + + // Check to see if we are required to do a copy block because the struct contains holes + // and either the src or dest is externally visible + // + bool requiresCopyBlock = false; + bool srcSingleLclVarAsg = false; + bool destSingleLclVarAsg = false; + + // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock. + if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct)) + { + requiresCopyBlock = true; + } + + // Can we use field by field assignment for the dest? + if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles) + { + JITDUMP(" dest contains custom layout and contains holes"); + // C++ style CopyBlock with holes + requiresCopyBlock = true; + } + + // Can we use field by field assignment for the src? + if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles) + { + JITDUMP(" src contains custom layout and contains holes"); + // C++ style CopyBlock with holes + requiresCopyBlock = true; + } + +#if defined(TARGET_ARM) + if ((src->OperIsIndir()) && (src->gtFlags & GTF_IND_UNALIGNED)) + { + JITDUMP(" src is unaligned"); + requiresCopyBlock = true; + } + + if (asg->gtFlags & GTF_BLK_UNALIGNED) + { + JITDUMP(" asg is unaligned"); + requiresCopyBlock = true; + } +#endif // TARGET_ARM + + // Don't use field by field assignment if the src is a call, + // lowering will handle it without spilling the call result into memory + // to access the individual fields. + // + if (src->OperGet() == GT_CALL) + { + JITDUMP(" src is a call"); + requiresCopyBlock = true; + } + + // If we passed the above checks, then we will check these two + if (!requiresCopyBlock) + { + // It is not always profitable to do field by field init for structs that are allocated to memory. + // A struct with 8 bool fields will require 8 moves instead of one if we do this transformation. + // A simple heuristic when field by field copy is prefered: + // - if fields can be enregistered; + // - if the struct has GCPtrs (block copy would be done via helper that is expensive); + // - if the struct has only one field. + bool dstFldIsProfitable = + ((destLclVar != nullptr) && + (!destLclVar->lvDoNotEnregister || destLclVar->HasGCPtr() || (destLclVar->lvFieldCnt == 1))); + bool srcFldIsProfitable = + ((srcLclVar != nullptr) && + (!srcLclVar->lvDoNotEnregister || srcLclVar->HasGCPtr() || (srcLclVar->lvFieldCnt == 1))); + // Are both dest and src promoted structs? + if (destDoFldAsg && srcDoFldAsg && (dstFldIsProfitable || srcFldIsProfitable)) + { + // Both structs should be of the same type, or have the same number of fields of the same type. + // If not we will use a copy block. + bool misMatchedTypes = false; + if (destLclVar->GetStructHnd() != srcLclVar->GetStructHnd()) + { + if (destLclVar->lvFieldCnt != srcLclVar->lvFieldCnt) + { + misMatchedTypes = true; + } + else + { + for (int i = 0; i < destLclVar->lvFieldCnt; i++) + { + LclVarDsc* destFieldVarDsc = lvaGetDesc(destLclVar->lvFieldLclStart + i); + LclVarDsc* srcFieldVarDsc = lvaGetDesc(srcLclVar->lvFieldLclStart + i); + if ((destFieldVarDsc->lvType != srcFieldVarDsc->lvType) || + (destFieldVarDsc->lvFldOffset != srcFieldVarDsc->lvFldOffset)) + { + misMatchedTypes = true; + break; + } + } + } + if (misMatchedTypes) + { + requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock + JITDUMP(" with mismatched types"); + } + } + } + else if (destDoFldAsg && dstFldIsProfitable) + { + // Match the following kinds of trees: + // fgMorphTree BB01, stmt 9 (before) + // [000052] ------------ const int 8 + // [000053] -A--G------- copyBlk void + // [000051] ------------ addr byref + // [000050] ------------ lclVar long V07 loc5 + // [000054] --------R--- void + // [000049] ------------ addr byref + // [000048] ------------ lclVar struct(P) V06 loc4 + // long V06.h (offs=0x00) -> V17 tmp9 + // Yields this transformation + // fgMorphCopyBlock (after): + // [000050] ------------ lclVar long V07 loc5 + // [000085] -A---------- = long + // [000083] D------N---- lclVar long V17 tmp9 + // + if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) && + (blockWidth == genTypeSize(srcLclVar->TypeGet()))) + { + // Reject the following tree: + // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe + // + // fgMorphTree BB01, stmt 6 (before) + // [000038] ------------- const int 4 + // [000039] -A--G-------- copyBlk void + // [000037] ------------- addr byref + // [000036] ------------- lclVar int V05 loc3 + // [000040] --------R---- void + // [000035] ------------- addr byref + // [000034] ------------- lclVar struct(P) V04 loc2 + // float V04.f1 (offs=0x00) -> V13 tmp6 + // As this would framsform into + // float V13 = int V05 + // + unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart; + var_types destType = lvaTable[fieldLclNum].TypeGet(); + if (srcLclVar->TypeGet() == destType) + { + srcSingleLclVarAsg = true; + } + } + } + else if (srcDoFldAsg && srcFldIsProfitable) + { + // Check for the symmetric case (which happens for the _pointer field of promoted spans): + // + // [000240] -----+------ /--* lclVar struct(P) V18 tmp9 + // /--* byref V18._value (offs=0x00) -> V30 tmp21 + // [000245] -A------R--- * = struct (copy) + // [000244] -----+------ \--* obj(8) struct + // [000243] -----+------ \--* addr byref + // [000242] D----+-N---- \--* lclVar byref V28 tmp19 + // + if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) && + (blockWidth == genTypeSize(destLclVar->TypeGet()))) + { + // Check for type agreement + unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart; + var_types srcType = lvaTable[fieldLclNum].TypeGet(); + if (destLclVar->TypeGet() == srcType) + { + destSingleLclVarAsg = true; + } + } + } + // Are neither dest or src promoted structs? + else + { + assert(!(destDoFldAsg && dstFldIsProfitable) && !(srcDoFldAsg && srcFldIsProfitable)); + requiresCopyBlock = true; // Leave as a CopyBlock + JITDUMP(" with no promoted structs"); + } + } + + // If we require a copy block the set both of the field assign bools to false + if (requiresCopyBlock) + { + // If a copy block is required then we won't do field by field assignments + destDoFldAsg = false; + srcDoFldAsg = false; + } + + JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n"); + + // Mark the dest/src structs as DoNotEnreg when they are not being fully referenced as the same type. + // + if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg) + { + if (!destLclVar->lvRegStruct || (destLclVar->lvType != dest->TypeGet())) + { + if (!dest->IsMultiRegLclVar() || (blockWidth != destLclVar->lvExactSize) || + (destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)) + { + // Mark it as DoNotEnregister. + lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp)); + } + else if (dest->IsMultiRegLclVar()) + { + // Handle this as lvIsMultiRegRet; this signals to SSA that it can't consider these fields + // SSA candidates (we don't have a way to represent multiple SSANums on MultiRegLclVar nodes). + destLclVar->lvIsMultiRegRet = true; + } + } + } + + if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg) + { + if (!srcLclVar->lvRegStruct || (srcLclVar->lvType != dest->TypeGet())) + { + lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp)); + } + } + + var_types asgType = dest->TypeGet(); + if (requiresCopyBlock) + { + bool isBlkReqd = (asgType == TYP_STRUCT); + dest = fgMorphBlockOperand(dest, asgType, blockWidth, isBlkReqd); + asg->AsOp()->gtOp1 = dest; + asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); + + // Eliminate the "OBJ or BLK" node on the src. + src = fgMorphBlockOperand(src, asgType, blockWidth, false /*!isBlkReqd*/); + asg->AsOp()->gtOp2 = src; + + goto _Done; + } + + // + // Otherwise we convert this CopyBlock into individual field by field assignments + // + tree = nullptr; + + GenTree* addrSpill = nullptr; + unsigned addrSpillTemp = BAD_VAR_NUM; + bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame + + unsigned fieldCnt = DUMMY_INIT(0); + + if (destDoFldAsg && srcDoFldAsg) + { + // To do fieldwise assignments for both sides. + // The structs do not have to be the same exact types but have to have same field types + // at the same offsets. + assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM); + assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt); + + fieldCnt = destLclVar->lvFieldCnt; + goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field + // assignments. + } + else if (destDoFldAsg) + { + fieldCnt = destLclVar->lvFieldCnt; + src = fgMorphBlockOperand(src, asgType, blockWidth, false /*isBlkReqd*/); + + srcUseLclFld = fgMorphCanUseLclFldForCopy(destLclNum, srcLclNum); + + if (!srcUseLclFld && srcAddr == nullptr) + { + srcAddr = fgMorphGetStructAddr(&src, destLclVar->GetStructHnd(), true /* rValue */); + } + } + else + { + assert(srcDoFldAsg); + fieldCnt = srcLclVar->lvFieldCnt; + dest = fgMorphBlockOperand(dest, asgType, blockWidth, false /*isBlkReqd*/); + if (dest->OperIsBlk()) + { + dest->SetOper(GT_IND); + dest->gtType = TYP_STRUCT; + } + destUseLclFld = fgMorphCanUseLclFldForCopy(srcLclNum, destLclNum); + if (!destUseLclFld) + { + destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); + } + } + + if (destDoFldAsg) + { + noway_assert(!srcDoFldAsg); + if (!srcUseLclFld) + { + if (gtClone(srcAddr)) + { + // srcAddr is simple expression. No need to spill. + noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); + } + else + { + // srcAddr is complex expression. Clone and spill it (unless the destination is + // a struct local that only has one field, in which case we'd only use the + // address value once...) + if (destLclVar->lvFieldCnt > 1) + { + // We will spill srcAddr (i.e. assign to a temp "BlockOp address local") + // no need to clone a new copy as it is only used once + // + addrSpill = srcAddr; // addrSpill represents the 'srcAddr' + } + } + } + } + + if (srcDoFldAsg) + { + noway_assert(!destDoFldAsg); + + // If we're doing field-wise stores, to an address within a local, and we copy + // the address into "addrSpill", do *not* declare the original local var node in the + // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the + // field-wise assignments as an "indirect" assignment to the local. + // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before + // we clone it.) + if (lclVarTree != nullptr) + { + lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG); + } + + if (!destUseLclFld) + { + if (gtClone(destAddr)) + { + // destAddr is simple expression. No need to spill + noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); + } + else + { + // destAddr is complex expression. Clone and spill it (unless + // the source is a struct local that only has one field, in which case we'd only + // use the address value once...) + if (srcLclVar->lvFieldCnt > 1) + { + // We will spill destAddr (i.e. assign to a temp "BlockOp address local") + // no need to clone a new copy as it is only used once + // + addrSpill = destAddr; // addrSpill represents the 'destAddr' + } + } + } + } + + // TODO-CQ: this should be based on a more general + // "BaseAddress" method, that handles fields of structs, before or after + // morphing. + if ((addrSpill != nullptr) && addrSpill->OperIs(GT_ADDR)) + { + GenTree* addrSpillOp = addrSpill->AsOp()->gtGetOp1(); + if (addrSpillOp->IsLocal()) + { + // We will *not* consider this to define the local, but rather have each individual field assign + // be a definition. + addrSpillOp->gtFlags &= ~(GTF_LIVENESS_MASK); + assert(lvaGetPromotionType(addrSpillOp->AsLclVarCommon()->GetLclNum()) != PROMOTION_TYPE_INDEPENDENT); + addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our + // local stack frame + } + } + + if (addrSpill != nullptr) + { + // 'addrSpill' is already morphed + + // Spill the (complex) address to a BYREF temp. + // Note, at most one address may need to be spilled. + addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local")); + + lvaTable[addrSpillTemp].lvType = TYP_BYREF; + + if (addrSpillIsStackDest) + { + lvaTable[addrSpillTemp].lvStackByref = true; + } + + tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill); + + // If we are assigning the address of a LclVar here + // liveness does not account for this kind of address taken use. + // + // We have to mark this local as address exposed so + // that we don't delete the definition for this LclVar + // as a dead store later on. + // + if (addrSpill->OperGet() == GT_ADDR) + { + GenTree* addrOp = addrSpill->AsOp()->gtOp1; + if (addrOp->IsLocal()) + { + unsigned lclVarNum = addrOp->AsLclVarCommon()->GetLclNum(); + lvaTable[lclVarNum].lvAddrExposed = true; + lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed)); + } + } + } + + _AssignFields: + + // We may have allocated a temp above, and that may have caused the lvaTable to be expanded. + // So, beyond this point we cannot rely on the old values of 'srcLclVar' and 'destLclVar'. + for (unsigned i = 0; i < fieldCnt; ++i) + { + GenTree* dstFld; + if (destDoFldAsg) + { + noway_assert(destLclNum != BAD_VAR_NUM); + unsigned dstFieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; + dstFld = gtNewLclvNode(dstFieldLclNum, lvaTable[dstFieldLclNum].TypeGet()); + // If it had been labeled a "USEASG", assignments to the individual promoted fields are not. + if (destAddr != nullptr) + { + noway_assert(destAddr->AsOp()->gtOp1->gtOper == GT_LCL_VAR); + dstFld->gtFlags |= destAddr->AsOp()->gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); + } + else + { + noway_assert(lclVarTree != nullptr); + dstFld->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG); + } + // Don't CSE the lhs of an assignment. + dstFld->gtFlags |= GTF_DONT_CSE; + } + else + { + noway_assert(srcDoFldAsg); + + if (destSingleLclVarAsg) + { + noway_assert(fieldCnt == 1); + noway_assert(destLclVar != nullptr); + noway_assert(addrSpill == nullptr); + + dstFld = gtNewLclvNode(destLclNum, destLclVar->TypeGet()); + } + else + { + GenTree* dstAddrClone = nullptr; + if (!destUseLclFld) + { + // Need address of the destination. + if (addrSpill) + { + assert(addrSpillTemp != BAD_VAR_NUM); + dstAddrClone = gtNewLclvNode(addrSpillTemp, TYP_BYREF); + } + else + { + if (i == 0) + { + // Use the orginal destAddr tree when i == 0 + dstAddrClone = destAddr; + } + else + { + // We can't clone multiple copies of a tree with persistent side effects + noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); + + dstAddrClone = gtCloneExpr(destAddr); + noway_assert(dstAddrClone != nullptr); + + JITDUMP("dstAddr - Multiple Fields Clone created:\n"); + DISPTREE(dstAddrClone); + + // Morph the newly created tree + dstAddrClone = fgMorphTree(dstAddrClone); + } + + // Is the address of a local? + GenTreeLclVarCommon* lclVarTree = nullptr; + bool isEntire = false; + bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr); + if (dstAddrClone->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire)) + { + lclVarTree->gtFlags |= GTF_VAR_DEF; + if (!isEntire) + { + lclVarTree->gtFlags |= GTF_VAR_USEASG; + } + } + } + } + + LclVarDsc* srcVarDsc = lvaGetDesc(srcLclNum); + unsigned srcFieldLclNum = srcVarDsc->lvFieldLclStart + i; + LclVarDsc* srcFieldVarDsc = lvaGetDesc(srcFieldLclNum); + + // Have to set the field sequence -- which means we need the field handle. + CORINFO_CLASS_HANDLE classHnd = srcVarDsc->GetStructHnd(); + CORINFO_FIELD_HANDLE fieldHnd = + info.compCompHnd->getFieldInClass(classHnd, srcFieldVarDsc->lvFldOrdinal); + FieldSeqNode* curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); + + unsigned srcFieldOffset = lvaGetDesc(srcFieldLclNum)->lvFldOffset; + var_types srcType = srcFieldVarDsc->TypeGet(); + + if (!destUseLclFld) + { + + if (srcFieldOffset == 0) + { + fgAddFieldSeqForZeroOffset(dstAddrClone, curFieldSeq); + } + else + { + GenTree* fieldOffsetNode = gtNewIconNode(srcFieldVarDsc->lvFldOffset, curFieldSeq); + dstAddrClone = gtNewOperNode(GT_ADD, TYP_BYREF, dstAddrClone, fieldOffsetNode); + } + + dstFld = gtNewIndir(srcType, dstAddrClone); + } + else + { + assert(dstAddrClone == nullptr); + assert((destLclOffset == 0) || (destFldSeq != nullptr)); + // If the dst was a struct type field "B" in a struct "A" then we add + // add offset of ("B" in "A") + current offset in "B". + unsigned summOffset = destLclOffset + srcFieldOffset; + dstFld = gtNewLclFldNode(destLclNum, srcType, summOffset); + FieldSeqNode* dstFldFldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq); + dstFld->AsLclFld()->SetFieldSeq(dstFldFldSeq); + + // TODO-1stClassStructs: remove this and implement storing to a field in a struct in a reg. + lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_LocalField)); + } + + // !!! The destination could be on stack. !!! + // This flag will let us choose the correct write barrier. + dstFld->gtFlags |= GTF_IND_TGTANYWHERE; + } + } + + GenTree* srcFld = nullptr; + if (srcDoFldAsg) + { + noway_assert(srcLclNum != BAD_VAR_NUM); + unsigned srcFieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; + srcFld = gtNewLclvNode(srcFieldLclNum, lvaTable[srcFieldLclNum].TypeGet()); + + noway_assert(srcLclVarTree != nullptr); + srcFld->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK; + } + else + { + noway_assert(destDoFldAsg); + noway_assert(destLclNum != BAD_VAR_NUM); + unsigned dstFieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i; + + if (srcSingleLclVarAsg) + { + noway_assert(fieldCnt == 1); + noway_assert(srcLclNum != BAD_VAR_NUM); + noway_assert(addrSpill == nullptr); + + srcFld = gtNewLclvNode(srcLclNum, lvaGetDesc(srcLclNum)->TypeGet()); + } + else + { + GenTree* srcAddrClone = nullptr; + if (!srcUseLclFld) + { + // Need address of the source. + if (addrSpill) + { + assert(addrSpillTemp != BAD_VAR_NUM); + srcAddrClone = gtNewLclvNode(addrSpillTemp, TYP_BYREF); + } + else + { + if (i == 0) + { + // Use the orginal srcAddr tree when i == 0 + srcAddrClone = srcAddr; + } + else + { + // We can't clone multiple copies of a tree with persistent side effects + noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0); + + srcAddrClone = gtCloneExpr(srcAddr); + noway_assert(srcAddrClone != nullptr); + + JITDUMP("srcAddr - Multiple Fields Clone created:\n"); + DISPTREE(srcAddrClone); + + // Morph the newly created tree + srcAddrClone = fgMorphTree(srcAddrClone); + } + } + } + + CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].GetStructHnd(); + CORINFO_FIELD_HANDLE fieldHnd = + info.compCompHnd->getFieldInClass(classHnd, lvaTable[dstFieldLclNum].lvFldOrdinal); + FieldSeqNode* curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); + var_types destType = lvaGetDesc(dstFieldLclNum)->lvType; + + bool done = false; + if (lvaGetDesc(dstFieldLclNum)->lvFldOffset == 0) + { + // If this is a full-width use of the src via a different type, we need to create a GT_LCL_FLD. + // (Note that if it was the same type, 'srcSingleLclVarAsg' would be true.) + if (srcLclNum != BAD_VAR_NUM) + { + noway_assert(srcLclVarTree != nullptr); + assert(destType != TYP_STRUCT); + unsigned destSize = genTypeSize(destType); + srcLclVar = lvaGetDesc(srcLclNum); + unsigned srcSize = + (srcLclVar->lvType == TYP_STRUCT) ? srcLclVar->lvExactSize : genTypeSize(srcLclVar); + if (destSize == srcSize) + { + srcLclVarTree->gtFlags |= GTF_VAR_CAST; + srcLclVarTree->ChangeOper(GT_LCL_FLD); + srcLclVarTree->gtType = destType; + srcLclVarTree->AsLclFld()->SetFieldSeq(curFieldSeq); + srcFld = srcLclVarTree; + done = true; + } + } + } + if (!done) + { + unsigned fldOffset = lvaGetDesc(dstFieldLclNum)->lvFldOffset; + if (!srcUseLclFld) + { + assert(srcAddrClone != nullptr); + if (fldOffset == 0) + { + fgAddFieldSeqForZeroOffset(srcAddrClone, curFieldSeq); + } + else + { + GenTreeIntCon* fldOffsetNode = gtNewIconNode(fldOffset, curFieldSeq); + srcAddrClone = gtNewOperNode(GT_ADD, TYP_BYREF, srcAddrClone, fldOffsetNode); + } + srcFld = gtNewIndir(destType, srcAddrClone); + } + else + { + assert((srcLclOffset == 0) || (srcFldSeq != 0)); + // If the src was a struct type field "B" in a struct "A" then we add + // add offset of ("B" in "A") + current offset in "B". + unsigned summOffset = srcLclOffset + fldOffset; + srcFld = gtNewLclFldNode(srcLclNum, destType, summOffset); + FieldSeqNode* srcFldFldSeq = GetFieldSeqStore()->Append(srcFldSeq, curFieldSeq); + srcFld->AsLclFld()->SetFieldSeq(srcFldFldSeq); + // TODO-1stClassStructs: remove this and implement reading a field from a struct in a reg. + lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_LocalField)); + } + } + } + } + assert(srcFld != nullptr); + noway_assert(dstFld->TypeGet() == srcFld->TypeGet()); + + asg = gtNewAssignNode(dstFld, srcFld); + + // If we spilled the address, and we didn't do individual field assignments to promoted fields, + // and it was of a local, ensure that the destination local variable has been marked as address + // exposed. Neither liveness nor SSA are able to track this kind of indirect assignments. + if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM) + { + noway_assert(lvaGetDesc(destLclNum)->lvAddrExposed); + } + +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) + { + optAssertionGen(asg); + } +#endif // LOCAL_ASSERTION_PROP + + if (tree) + { + tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg); + } + else + { + tree = asg; + } + } + } + + if (isLateArg) + { + tree->gtFlags |= GTF_LATE_ARG; + } + +#ifdef DEBUG + if (tree != oldTree) + { + tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; + } +#endif + +_Done: + + JITDUMP("\nfgMorphCopyBlock (after):\n"); + DISPTREE(tree); + + return tree; +} + +//------------------------------------------------------------------------ +// fgMorphCanUseLclFldForCopy: check if we can access LclVar2 using LclVar1's fields. +// +// Arguments: +// lclNum1 - a promoted lclVar that is used in fieldwise assignment; +// lclNum2 - the local variable on the other side of ASG, can be BAD_VAR_NUM. +// +// Return Value: +// True if the second local is valid and has the same struct handle as the first, +// false otherwise. +// +// Notes: +// This check is needed to avoid accesing LCL_VARs with incorrect +// CORINFO_FIELD_HANDLE that would confuse VN optimizations. +// +bool Compiler::fgMorphCanUseLclFldForCopy(unsigned lclNum1, unsigned lclNum2) +{ + assert(lclNum1 != BAD_VAR_NUM); + if (lclNum2 == BAD_VAR_NUM) + { + return false; + } + const LclVarDsc* varDsc1 = lvaGetDesc(lclNum1); + const LclVarDsc* varDsc2 = lvaGetDesc(lclNum2); + assert(varTypeIsStruct(varDsc1)); + if (!varTypeIsStruct(varDsc2)) + { + return false; + } + CORINFO_CLASS_HANDLE struct1 = varDsc1->GetStructHnd(); + CORINFO_CLASS_HANDLE struct2 = varDsc2->GetStructHnd(); + assert(struct1 != NO_CLASS_HANDLE); + assert(struct2 != NO_CLASS_HANDLE); + if (struct1 != struct2) + { + return false; + } + return true; +} + +// insert conversions and normalize to make tree amenable to register +// FP architectures +GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree) +{ + if (tree->OperIsArithmetic()) + { + if (varTypeIsFloating(tree)) + { + GenTree* op1 = tree->AsOp()->gtOp1; + GenTree* op2 = tree->gtGetOp2(); + + assert(varTypeIsFloating(op1->TypeGet()) && varTypeIsFloating(op2->TypeGet())); + + if (op1->TypeGet() != tree->TypeGet()) + { + tree->AsOp()->gtOp1 = gtNewCastNode(tree->TypeGet(), op1, false, tree->TypeGet()); + } + if (op2->TypeGet() != tree->TypeGet()) + { + tree->AsOp()->gtOp2 = gtNewCastNode(tree->TypeGet(), op2, false, tree->TypeGet()); + } + } + } + else if (tree->OperIsCompare()) + { + GenTree* op1 = tree->AsOp()->gtOp1; + + if (varTypeIsFloating(op1)) + { + GenTree* op2 = tree->gtGetOp2(); + assert(varTypeIsFloating(op2)); + + if (op1->TypeGet() != op2->TypeGet()) + { + // both had better be floating, just one bigger than other + if (op1->TypeGet() == TYP_FLOAT) + { + assert(op2->TypeGet() == TYP_DOUBLE); + tree->AsOp()->gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); + } + else if (op2->TypeGet() == TYP_FLOAT) + { + assert(op1->TypeGet() == TYP_DOUBLE); + tree->AsOp()->gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); + } + } + } + } + + return tree; +} + +#ifdef FEATURE_SIMD + +//-------------------------------------------------------------------------------------------------------------- +// getSIMDStructFromField: +// Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for +// the struct node, also base type, field index and simd size. If it is not, just return nullptr. +// Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we +// should return nullptr, since in this case we should treat SIMD struct as a regular struct. +// However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic +// as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node +// if the struct is a SIMD struct. +// +// Arguments: +// tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd +// struct used for simd intrinsic or not. +// simdBaseJitTypeOut - CorInfoType pointer, if the tree node is the tree we want, we set *simdBaseJitTypeOut +// to simd lclvar's base JIT type. +// indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut +// equals to the index number of this field. +// simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut +// equals to the simd struct size which this tree belongs to. +// ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore +// the UsedInSIMDIntrinsic check. +// +// return value: +// A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd +// instrinic related field, return nullptr. +// + +GenTree* Compiler::getSIMDStructFromField(GenTree* tree, + CorInfoType* simdBaseJitTypeOut, + unsigned* indexOut, + unsigned* simdSizeOut, + bool ignoreUsedInSIMDIntrinsic /*false*/) +{ + GenTree* ret = nullptr; + if (tree->OperGet() == GT_FIELD) + { + GenTree* objRef = tree->AsField()->gtFldObj; + if (objRef != nullptr) + { + GenTree* obj = nullptr; + if (objRef->gtOper == GT_ADDR) + { + obj = objRef->AsOp()->gtOp1; + } + else if (ignoreUsedInSIMDIntrinsic) + { + obj = objRef; + } + else + { + return nullptr; + } + + if (isSIMDTypeLocal(obj)) + { + unsigned lclNum = obj->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[lclNum]; + if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic) + { + *simdSizeOut = varDsc->lvExactSize; + *simdBaseJitTypeOut = getBaseJitTypeOfSIMDLocal(obj); + ret = obj; + } + } + else if (obj->OperGet() == GT_SIMD) + { + ret = obj; + GenTreeSIMD* simdNode = obj->AsSIMD(); + *simdSizeOut = simdNode->GetSimdSize(); + *simdBaseJitTypeOut = simdNode->GetSimdBaseJitType(); + } +#ifdef FEATURE_HW_INTRINSICS + else if (obj->OperIsHWIntrinsic()) + { + ret = obj; + GenTreeHWIntrinsic* simdNode = obj->AsHWIntrinsic(); + *simdSizeOut = simdNode->GetSimdSize(); + *simdBaseJitTypeOut = simdNode->GetSimdBaseJitType(); + } +#endif // FEATURE_HW_INTRINSICS + } + } + if (ret != nullptr) + { + unsigned baseTypeSize = genTypeSize(JITtype2varType(*simdBaseJitTypeOut)); + *indexOut = tree->AsField()->gtFldOffset / baseTypeSize; + } + return ret; +} + +/***************************************************************************** +* If a read operation tries to access simd struct field, then transform the +* operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree. +* Otherwise, return the old tree. +* Argument: +* tree - GenTree*. If this pointer points to simd struct which is used for simd +* intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem. +* Return: +* A GenTree* which points to the new tree. If the tree is not for simd intrinsic, +* return nullptr. +*/ + +GenTree* Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTree* tree) +{ + unsigned index = 0; + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + unsigned simdSize = 0; + GenTree* simdStructNode = getSIMDStructFromField(tree, &simdBaseJitType, &index, &simdSize); + if (simdStructNode != nullptr) + { + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + assert(simdSize >= ((index + 1) * genTypeSize(simdBaseType))); + GenTree* op2 = gtNewIconNode(index); + tree = gtNewSIMDNode(simdBaseType, simdStructNode, op2, SIMDIntrinsicGetItem, simdBaseJitType, simdSize); +#ifdef DEBUG + tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + } + return tree; +} + +/***************************************************************************** +* Transform an assignment of a SIMD struct field to SIMD intrinsic +* SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment, +* then return the old tree. +* Argument: +* tree - GenTree*. If this pointer points to simd struct which is used for simd +* intrinsic, we will morph it as simd intrinsic set. +* Return: +* A GenTree* which points to the new tree. If the tree is not for simd intrinsic, +* return nullptr. +*/ + +GenTree* Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTree* tree) +{ + assert(tree->OperGet() == GT_ASG); + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + unsigned index = 0; + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + unsigned simdSize = 0; + GenTree* simdOp1Struct = getSIMDStructFromField(op1, &simdBaseJitType, &index, &simdSize); + if (simdOp1Struct != nullptr) + { + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + + // Generate the simd set intrinsic + assert(simdSize >= ((index + 1) * genTypeSize(simdBaseType))); + + SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid; + switch (index) + { + case 0: + simdIntrinsicID = SIMDIntrinsicSetX; + break; + case 1: + simdIntrinsicID = SIMDIntrinsicSetY; + break; + case 2: + simdIntrinsicID = SIMDIntrinsicSetZ; + break; + case 3: + simdIntrinsicID = SIMDIntrinsicSetW; + break; + default: + noway_assert(!"There is no set intrinsic for index bigger than 3"); + } + + GenTree* target = gtClone(simdOp1Struct); + assert(target != nullptr); + var_types simdType = target->gtType; + GenTree* simdTree = gtNewSIMDNode(simdType, simdOp1Struct, op2, simdIntrinsicID, simdBaseJitType, simdSize); + + tree->AsOp()->gtOp1 = target; + tree->AsOp()->gtOp2 = simdTree; + + // fgMorphTree has already called fgMorphImplicitByRefArgs() on this assignment, but the source + // and target have not yet been morphed. + // Therefore, in case the source and/or target are now implicit byrefs, we need to call it again. + if (fgMorphImplicitByRefArgs(tree)) + { + if (tree->gtGetOp1()->OperIsBlk()) + { + assert(tree->gtGetOp1()->TypeGet() == simdType); + tree->gtGetOp1()->SetOper(GT_IND); + tree->gtGetOp1()->gtType = simdType; + } + } +#ifdef DEBUG + tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + } + + return tree; +} + +#endif // FEATURE_SIMD + +//------------------------------------------------------------------------------ +// fgMorphCommutative : Try to simplify "(X op C1) op C2" to "X op C3" +// for commutative operators. +// +// Arguments: +// tree - node to fold +// +// return value: +// A folded GenTree* instance or nullptr if something prevents folding. +// + +GenTree* Compiler::fgMorphCommutative(GenTreeOp* tree) +{ + assert(varTypeIsIntegralOrI(tree->TypeGet())); + assert(tree->OperIs(GT_ADD, GT_MUL, GT_OR, GT_AND, GT_XOR)); + + // op1 can be GT_COMMA, in this case we're going to fold + // "(op (COMMA(... (op X C1))) C2)" to "(COMMA(... (op X C3)))" + GenTree* op1 = tree->gtGetOp1()->gtEffectiveVal(true); + genTreeOps oper = tree->OperGet(); + + if (!op1->OperIs(oper) || !tree->gtGetOp2()->IsCnsIntOrI() || !op1->gtGetOp2()->IsCnsIntOrI() || + op1->gtGetOp1()->IsCnsIntOrI() || gtIsActiveCSE_Candidate(op1)) + { + return nullptr; + } + + if (tree->OperMayOverflow() && (tree->gtOverflow() || op1->gtOverflow())) + { + return nullptr; + } + + GenTreeIntCon* cns1 = op1->gtGetOp2()->AsIntCon(); + GenTreeIntCon* cns2 = tree->gtGetOp2()->AsIntCon(); + + if (!varTypeIsIntegralOrI(tree->TypeGet()) || cns1->TypeIs(TYP_REF) || !cns1->TypeIs(cns2->TypeGet())) + { + return nullptr; + } + + GenTree* foldedCns = gtFoldExprConst(gtNewOperNode(oper, cns1->TypeGet(), cns1, cns2)); + if (!foldedCns->IsCnsIntOrI()) + { + // Give up if we can't fold "C1 op C2" + return nullptr; + } + + cns1->gtIconVal = foldedCns->AsIntCon()->IconValue(); + if ((oper == GT_ADD) && foldedCns->IsCnsIntOrI()) + { + cns1->AsIntCon()->gtFieldSeq = + GetFieldSeqStore()->Append(cns1->AsIntCon()->gtFieldSeq, cns2->AsIntCon()->gtFieldSeq); + } + + GenTreeOp* newTree = tree->gtGetOp1()->AsOp(); + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(cns2); + DEBUG_DESTROY_NODE(foldedCns); + INDEBUG(newTree->gtOp2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return newTree; +} + +/***************************************************************************** + * + * Transform the given GTK_SMPOP tree for code generation. + */ + +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function +#endif +GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) +{ + ALLOCA_CHECK(); + assert(tree->OperKind() & GTK_SMPOP); + + /* The steps in this function are : + o Perform required preorder processing + o Process the first, then second operand, if any + o Perform required postorder morphing + o Perform optional postorder morphing if optimizing + */ + + bool isQmarkColon = false; + +#if LOCAL_ASSERTION_PROP + AssertionIndex origAssertionCount = DUMMY_INIT(0); + AssertionDsc* origAssertionTab = DUMMY_INIT(NULL); + + AssertionIndex thenAssertionCount = DUMMY_INIT(0); + AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL); +#endif + + if (fgGlobalMorph) + { + tree = fgMorphForRegisterFP(tree); + } + + genTreeOps oper = tree->OperGet(); + var_types typ = tree->TypeGet(); + GenTree* op1 = tree->AsOp()->gtOp1; + GenTree* op2 = tree->gtGetOp2IfPresent(); + + /*------------------------------------------------------------------------- + * First do any PRE-ORDER processing + */ + + switch (oper) + { + // Some arithmetic operators need to use a helper call to the EE + int helper; + + case GT_ASG: + tree = fgDoNormalizeOnStore(tree); + /* fgDoNormalizeOnStore can change op2 */ + noway_assert(op1 == tree->AsOp()->gtOp1); + op2 = tree->AsOp()->gtOp2; + +#ifdef FEATURE_SIMD + { + // We should check whether op2 should be assigned to a SIMD field or not. + // If it is, we should tranlate the tree to simd intrinsic. + assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0)); + GenTree* newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree); + typ = tree->TypeGet(); + op1 = tree->gtGetOp1(); + op2 = tree->gtGetOp2(); +#ifdef DEBUG + assert((tree == newTree) && (tree->OperGet() == oper)); + if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0) + { + tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; + } +#endif // DEBUG + } +#endif + + // We can't CSE the LHS of an assignment. Only r-values can be CSEed. + // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former + // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type) + // TODO-1stClassStructs: improve this. + if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) + { + op1->gtFlags |= GTF_DONT_CSE; + } + break; + + case GT_ADDR: + + /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */ + op1->gtFlags |= GTF_DONT_CSE; + break; + + case GT_QMARK: + case GT_JTRUE: + + noway_assert(op1); + + if (op1->OperKind() & GTK_RELOP) + { + noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK)); + /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does + not need to materialize the result as a 0 or 1. */ + + /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */ + op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + + // Request that the codegen for op1 sets the condition flags + // when it generates the code for op1. + // + // Codegen for op1 must set the condition flags if + // this method returns true. + // + op1->gtRequestSetFlags(); + } + else + { + GenTree* effOp1 = op1->gtEffectiveVal(); + noway_assert((effOp1->gtOper == GT_CNS_INT) && + (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); + } + break; + + case GT_COLON: +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) +#endif + { + isQmarkColon = true; + } + break; + + case GT_INDEX: + return fgMorphArrayIndex(tree); + + case GT_CAST: + return fgMorphCast(tree); + + case GT_MUL: + if (opts.OptimizationEnabled() && !optValnumCSE_phase && !tree->gtOverflow()) + { + // MUL(NEG(a), C) => MUL(a, NEG(C)) + if (op1->OperIs(GT_NEG) && !op1->gtGetOp1()->IsCnsIntOrI() && op2->IsCnsIntOrI() && + !op2->IsIconHandle()) + { + GenTree* newOp1 = op1->gtGetOp1(); + GenTree* newConst = gtNewIconNode(-op2->AsIntCon()->IconValue(), op2->TypeGet()); + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(op2); + tree->AsOp()->gtOp1 = newOp1; + tree->AsOp()->gtOp2 = newConst; + return fgMorphSmpOp(tree, mac); + } + } + +#ifndef TARGET_64BIT + if (typ == TYP_LONG) + { + /* For (long)int1 * (long)int2, we dont actually do the + casts, and just multiply the 32 bit values, which will + give us the 64 bit result in edx:eax */ + + noway_assert(op2); + if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST && + genActualType(op1->CastFromType()) == TYP_INT && genActualType(op2->CastFromType()) == TYP_INT) && + !op1->gtOverflow() && !op2->gtOverflow()) + { + // The casts have to be of the same signedness. + if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED)) + { + // We see if we can force an int constant to change its signedness + GenTree* constOp; + if (op1->AsCast()->CastOp()->gtOper == GT_CNS_INT) + constOp = op1; + else if (op2->AsCast()->CastOp()->gtOper == GT_CNS_INT) + constOp = op2; + else + goto NO_MUL_64RSLT; + + if (((unsigned)(constOp->AsCast()->CastOp()->AsIntCon()->gtIconVal) < (unsigned)(0x80000000))) + constOp->gtFlags ^= GTF_UNSIGNED; + else + goto NO_MUL_64RSLT; + } + + // The only combination that can overflow + if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED)) + goto NO_MUL_64RSLT; + + /* Remaining combinations can never overflow during long mul. */ + + tree->gtFlags &= ~GTF_OVERFLOW; + + /* Do unsigned mul only if the casts were unsigned */ + + tree->gtFlags &= ~GTF_UNSIGNED; + tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED; + + /* Since we are committing to GTF_MUL_64RSLT, we don't want + the casts to be folded away. So morph the castees directly */ + + op1->AsOp()->gtOp1 = fgMorphTree(op1->AsOp()->gtOp1); + op2->AsOp()->gtOp1 = fgMorphTree(op2->AsOp()->gtOp1); + + // Propagate side effect flags up the tree + op1->gtFlags &= ~GTF_ALL_EFFECT; + op1->gtFlags |= (op1->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT); + op2->gtFlags &= ~GTF_ALL_EFFECT; + op2->gtFlags |= (op2->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT); + + // If the GT_MUL can be altogether folded away, we should do that. + + if ((op1->AsCast()->CastOp()->OperKind() & op2->AsCast()->CastOp()->OperKind() & GTK_CONST) && + opts.OptEnabled(CLFLG_CONSTANTFOLD)) + { + tree->AsOp()->gtOp1 = op1 = gtFoldExprConst(op1); + tree->AsOp()->gtOp2 = op2 = gtFoldExprConst(op2); + noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST); + tree = gtFoldExprConst(tree); + noway_assert(tree->OperIsConst()); + return tree; + } + + tree->gtFlags |= GTF_MUL_64RSLT; + + // If op1 and op2 are unsigned casts, we need to do an unsigned mult + tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED); + + // Insert GT_NOP nodes for the cast operands so that they do not get folded + // And propagate the new flags. We don't want to CSE the casts because + // codegen expects GTF_MUL_64RSLT muls to have a certain layout. + + if (op1->AsCast()->CastOp()->OperGet() != GT_NOP) + { + op1->AsOp()->gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->AsCast()->CastOp()); + op1->gtFlags &= ~GTF_ALL_EFFECT; + op1->gtFlags |= (op1->AsCast()->CastOp()->gtFlags & GTF_ALL_EFFECT); + } + + if (op2->AsCast()->CastOp()->OperGet() != GT_NOP) + { + op2->AsOp()->gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->AsCast()->CastOp()); + op2->gtFlags &= ~GTF_ALL_EFFECT; + op2->gtFlags |= (op2->AsCast()->CastOp()->gtFlags & GTF_ALL_EFFECT); + } + + op1->gtFlags |= GTF_DONT_CSE; + op2->gtFlags |= GTF_DONT_CSE; + + tree->gtFlags &= ~GTF_ALL_EFFECT; + tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT); + + goto DONE_MORPHING_CHILDREN; + } + else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0) + { + NO_MUL_64RSLT: + if (tree->gtOverflow()) + helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF; + else + helper = CORINFO_HELP_LMUL; + + goto USE_HELPER_FOR_ARITH; + } + else + { + /* We are seeing this node again. We have decided to use + GTF_MUL_64RSLT, so leave it alone. */ + + assert(tree->gtIsValid64RsltMul()); + } + } +#endif // !TARGET_64BIT + break; + + case GT_ARR_LENGTH: + if (op1->OperIs(GT_CNS_STR)) + { + // Optimize `ldstr + String::get_Length()` to CNS_INT + // e.g. "Hello".Length => 5 + GenTreeIntCon* iconNode = gtNewStringLiteralLength(op1->AsStrCon()); + if (iconNode != nullptr) + { + INDEBUG(iconNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return iconNode; + } + } + break; + + case GT_DIV: + // Replace "val / dcon" with "val * (1.0 / dcon)" if dcon is a power of two. + // Powers of two within range are always exactly represented, + // so multiplication by the reciprocal is safe in this scenario + if (fgGlobalMorph && op2->IsCnsFltOrDbl()) + { + double divisor = op2->AsDblCon()->gtDconVal; + if (((typ == TYP_DOUBLE) && FloatingPointUtils::hasPreciseReciprocal(divisor)) || + ((typ == TYP_FLOAT) && FloatingPointUtils::hasPreciseReciprocal(forceCastToFloat(divisor)))) + { + oper = GT_MUL; + tree->ChangeOper(oper); + op2->AsDblCon()->gtDconVal = 1.0 / divisor; + } + } + + // array.Length is always positive so GT_DIV can be changed to GT_UDIV + // if op2 is a positive cns + if (!optValnumCSE_phase && op1->OperIs(GT_ARR_LENGTH) && op2->IsIntegralConst() && + op2->AsIntCon()->IconValue() >= 2) // for 0 and 1 it doesn't matter if it's UDIV or DIV + { + assert(tree->OperIs(GT_DIV)); + tree->ChangeOper(GT_UDIV); + return fgMorphSmpOp(tree, mac); + } + + if (opts.OptimizationEnabled() && !optValnumCSE_phase) + { + // DIV(NEG(a), C) => DIV(a, NEG(C)) + if (op1->OperIs(GT_NEG) && !op1->gtGetOp1()->IsCnsIntOrI() && op2->IsCnsIntOrI() && + !op2->IsIconHandle()) + { + ssize_t op2Value = op2->AsIntCon()->IconValue(); + if (op2Value != 1 && op2Value != -1) // Div must throw exception for int(long).MinValue / -1. + { + tree->AsOp()->gtOp1 = op1->gtGetOp1(); + DEBUG_DESTROY_NODE(op1); + tree->AsOp()->gtOp2 = gtNewIconNode(-op2Value, op2->TypeGet()); + DEBUG_DESTROY_NODE(op2); + return fgMorphSmpOp(tree, mac); + } + } + } + +#ifndef TARGET_64BIT + if (typ == TYP_LONG) + { + helper = CORINFO_HELP_LDIV; + goto USE_HELPER_FOR_ARITH; + } + +#if USE_HELPERS_FOR_INT_DIV + if (typ == TYP_INT) + { + helper = CORINFO_HELP_DIV; + goto USE_HELPER_FOR_ARITH; + } +#endif +#endif // !TARGET_64BIT + + if (op2->gtOper == GT_CAST && op2->AsOp()->gtOp1->IsCnsIntOrI()) + { + op2 = gtFoldExprConst(op2); + } + break; + + case GT_UDIV: + +#ifndef TARGET_64BIT + if (typ == TYP_LONG) + { + helper = CORINFO_HELP_ULDIV; + goto USE_HELPER_FOR_ARITH; + } +#if USE_HELPERS_FOR_INT_DIV + if (typ == TYP_INT) + { + helper = CORINFO_HELP_UDIV; + goto USE_HELPER_FOR_ARITH; + } +#endif +#endif // TARGET_64BIT + break; + + case GT_MOD: + + if (varTypeIsFloating(typ)) + { + helper = CORINFO_HELP_DBLREM; + noway_assert(op2); + if (op1->TypeGet() == TYP_FLOAT) + { + if (op2->TypeGet() == TYP_FLOAT) + { + helper = CORINFO_HELP_FLTREM; + } + else + { + tree->AsOp()->gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE); + } + } + else if (op2->TypeGet() == TYP_FLOAT) + { + tree->AsOp()->gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE); + } + goto USE_HELPER_FOR_ARITH; + } + + // array.Length is always positive so GT_DIV can be changed to GT_UDIV + // if op2 is a positive cns + if (!optValnumCSE_phase && op1->OperIs(GT_ARR_LENGTH) && op2->IsIntegralConst() && + op2->AsIntCon()->IconValue() >= 2) // for 0 and 1 it doesn't matter if it's UMOD or MOD + { + assert(tree->OperIs(GT_MOD)); + tree->ChangeOper(GT_UMOD); + return fgMorphSmpOp(tree, mac); + } + + // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod. + // A similar optimization for signed mod will not work for a negative perfectly divisible + // HI-word. To make it correct, we would need to divide without the sign and then flip the + // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline. + goto ASSIGN_HELPER_FOR_MOD; + + case GT_UMOD: + +#ifdef TARGET_ARMARCH +// +// Note for TARGET_ARMARCH we don't have a remainder instruction, so we don't do this optimization +// +#else // TARGET_XARCH + /* If this is an unsigned long mod with op2 which is a cast to long from a + constant int, then don't morph to a call to the helper. This can be done + faster inline using idiv. + */ + + noway_assert(op2); + if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) && + ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) && + ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED))) + { + if (op2->gtOper == GT_CAST && op2->AsCast()->CastOp()->gtOper == GT_CNS_INT && + op2->AsCast()->CastOp()->AsIntCon()->gtIconVal >= 2 && + op2->AsCast()->CastOp()->AsIntCon()->gtIconVal <= 0x3fffffff && + (tree->gtFlags & GTF_UNSIGNED) == (op2->AsCast()->CastOp()->gtFlags & GTF_UNSIGNED)) + { + tree->AsOp()->gtOp2 = op2 = fgMorphCast(op2); + noway_assert(op2->gtOper == GT_CNS_NATIVELONG); + } + + if (op2->gtOper == GT_CNS_NATIVELONG && op2->AsIntConCommon()->LngValue() >= 2 && + op2->AsIntConCommon()->LngValue() <= 0x3fffffff) + { + tree->AsOp()->gtOp1 = op1 = fgMorphTree(op1); + noway_assert(op1->TypeGet() == TYP_LONG); + + // Update flags for op1 morph + tree->gtFlags &= ~GTF_ALL_EFFECT; + + tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant + + // If op1 is a constant, then do constant folding of the division operator + if (op1->gtOper == GT_CNS_NATIVELONG) + { + tree = gtFoldExpr(tree); + } + + // We may fail to fold + if (!tree->OperIsConst()) + { + tree->AsOp()->CheckDivideByConstOptimized(this); + } + + return tree; + } + } +#endif // TARGET_XARCH + + ASSIGN_HELPER_FOR_MOD: + + // For "val % 1", return 0 if op1 doesn't have any side effects + // and we are not in the CSE phase, we cannot discard 'tree' + // because it may contain CSE expressions that we haven't yet examined. + // + if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase) + { + if (op2->IsIntegralConst(1)) + { + GenTree* zeroNode = gtNewZeroConNode(typ); +#ifdef DEBUG + zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + DEBUG_DESTROY_NODE(tree); + return zeroNode; + } + } + +#ifndef TARGET_64BIT + if (typ == TYP_LONG) + { + helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD; + goto USE_HELPER_FOR_ARITH; + } + +#if USE_HELPERS_FOR_INT_DIV + if (typ == TYP_INT) + { + if (oper == GT_UMOD) + { + helper = CORINFO_HELP_UMOD; + goto USE_HELPER_FOR_ARITH; + } + else if (oper == GT_MOD) + { + helper = CORINFO_HELP_MOD; + goto USE_HELPER_FOR_ARITH; + } + } +#endif +#endif // !TARGET_64BIT + + if (op2->gtOper == GT_CAST && op2->AsOp()->gtOp1->IsCnsIntOrI()) + { + op2 = gtFoldExprConst(op2); + } + +#ifdef TARGET_ARM64 + // For ARM64 we don't have a remainder instruction, + // The architecture manual suggests the following transformation to + // generate code for such operator: + // + // a % b = a - (a / b) * b; + // + // TODO: there are special cases where it can be done better, for example + // when the modulo operation is unsigned and the divisor is a + // integer constant power of two. In this case, we can make the transform: + // + // a % b = a & (b - 1); + // + // Lower supports it for all cases except when `a` is constant, but + // in Morph we can't guarantee that `a` won't be transformed into a constant, + // so can't guarantee that lower will be able to do this optimization. + { + // Do "a % b = a - (a / b) * b" morph always, see TODO before this block. + bool doMorphModToSubMulDiv = true; + + if (doMorphModToSubMulDiv) + { + assert(!optValnumCSE_phase); + + tree = fgMorphModToSubMulDiv(tree->AsOp()); + op1 = tree->AsOp()->gtOp1; + op2 = tree->AsOp()->gtOp2; + } + } +#else // !TARGET_ARM64 + // If b is not a power of 2 constant then lowering replaces a % b + // with a - (a / b) * b and applies magic division optimization to + // a / b. The code may already contain an a / b expression (e.g. + // x = a / 10; y = a % 10;) and then we end up with redundant code. + // If we convert % to / here we give CSE the opportunity to eliminate + // the redundant division. If there's no redundant division then + // nothing is lost, lowering would have done this transform anyway. + + if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())) + { + ssize_t divisorValue = op2->AsIntCon()->IconValue(); + size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast(divisorValue) + : static_cast(abs(divisorValue)); + + if (!isPow2(absDivisorValue)) + { + tree = fgMorphModToSubMulDiv(tree->AsOp()); + op1 = tree->AsOp()->gtOp1; + op2 = tree->AsOp()->gtOp2; + } + } +#endif // !TARGET_ARM64 + break; + + USE_HELPER_FOR_ARITH: + { + // TODO: this comment is wrong now, do an appropriate fix. + /* We have to morph these arithmetic operations into helper calls + before morphing the arguments (preorder), else the arguments + won't get correct values of fgPtrArgCntCur. + However, try to fold the tree first in case we end up with a + simple node which won't need a helper call at all */ + + noway_assert(tree->OperIsBinary()); + + GenTree* oldTree = tree; + + tree = gtFoldExpr(tree); + + // Were we able to fold it ? + // Note that gtFoldExpr may return a non-leaf even if successful + // e.g. for something like "expr / 1" - see also bug #290853 + if (tree->OperIsLeaf() || (oldTree != tree)) + { + return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree); + } + + // Did we fold it into a comma node with throw? + if (tree->gtOper == GT_COMMA) + { + noway_assert(fgIsCommaThrow(tree)); + return fgMorphTree(tree); + } + } + return fgMorphIntoHelperCall(tree, helper, gtNewCallArgs(op1, op2)); + + case GT_RETURN: + // normalize small integer return values + if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) && (op1->TypeGet() != TYP_VOID) && + fgCastNeeded(op1, info.compRetType)) + { + // Small-typed return values are normalized by the callee + op1 = gtNewCastNode(TYP_INT, op1, false, info.compRetType); + + // Propagate GTF_COLON_COND + op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND); + + tree->AsOp()->gtOp1 = fgMorphCast(op1); + + // Propagate side effect flags + tree->gtFlags &= ~GTF_ALL_EFFECT; + tree->gtFlags |= (tree->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT); + + return tree; + } + if (!tree->TypeIs(TYP_VOID)) + { + if (op1->OperIs(GT_OBJ, GT_BLK, GT_IND)) + { + op1 = fgMorphRetInd(tree->AsUnOp()); + } + if (op1->OperIs(GT_LCL_VAR)) + { + // With a `genReturnBB` this `RETURN(src)` tree will be replaced by a `ASG(genReturnLocal, src)` + // and `ASG` will be tranformed into field by field copy without parent local referencing if + // possible. + GenTreeLclVar* lclVar = op1->AsLclVar(); + unsigned lclNum = lclVar->GetLclNum(); + if ((genReturnLocal == BAD_VAR_NUM) || (genReturnLocal == lclNum)) + { + LclVarDsc* varDsc = lvaGetDesc(lclVar); + if (varDsc->CanBeReplacedWithItsField(this)) + { + // We can replace the struct with its only field and allow copy propagation to replace + // return value that was written as a field. + unsigned fieldLclNum = varDsc->lvFieldLclStart; + LclVarDsc* fieldDsc = lvaGetDesc(fieldLclNum); + + if (!varTypeIsSmallInt(fieldDsc->lvType)) + { + // TODO-CQ: support that substitution for small types without creating `CAST` node. + // When a small struct is returned in a register higher bits could be left in undefined + // state. + JITDUMP("Replacing an independently promoted local var V%02u with its only field " + "V%02u for " + "the return [%06u]\n", + lclVar->GetLclNum(), fieldLclNum, dspTreeID(tree)); + lclVar->SetLclNum(fieldLclNum); + lclVar->ChangeType(fieldDsc->lvType); + } + } + } + } + } + break; + + case GT_EQ: + case GT_NE: + { + GenTree* optimizedTree = gtFoldTypeCompare(tree); + + if (optimizedTree != tree) + { + return fgMorphTree(optimizedTree); + } + } + + FALLTHROUGH; + + case GT_GT: + { + // Try and optimize nullable boxes feeding compares + GenTree* optimizedTree = gtFoldBoxNullable(tree); + + if (optimizedTree->OperGet() != tree->OperGet()) + { + return optimizedTree; + } + else + { + tree = optimizedTree; + } + + op1 = tree->AsOp()->gtOp1; + op2 = tree->gtGetOp2IfPresent(); + + break; + } + + case GT_RUNTIMELOOKUP: + return fgMorphTree(op1); + +#ifdef TARGET_ARM + case GT_INTRINSIC: + if (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) + { + switch (tree->TypeGet()) + { + case TYP_DOUBLE: + return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewCallArgs(op1)); + case TYP_FLOAT: + return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewCallArgs(op1)); + default: + unreached(); + } + } + break; +#endif + case GT_LIST: + // Special handling for the arg list. + return fgMorphArgList(tree->AsArgList(), mac); + + case GT_PUTARG_TYPE: + return fgMorphTree(tree->AsUnOp()->gtGetOp1()); + + default: + break; + } + + /*------------------------------------------------------------------------- + * Process the first operand, if any + */ + + if (op1) + { + +#if LOCAL_ASSERTION_PROP + // If we are entering the "then" part of a Qmark-Colon we must + // save the state of the current copy assignment table + // so that we can restore this state when entering the "else" part + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + if (optAssertionCount) + { + noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea + unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); + origAssertionTab = (AssertionDsc*)ALLOCA(tabSize); + origAssertionCount = optAssertionCount; + memcpy(origAssertionTab, optAssertionTabPrivate, tabSize); + } + else + { + origAssertionCount = 0; + origAssertionTab = nullptr; + } + } +#endif // LOCAL_ASSERTION_PROP + + // We might need a new MorphAddressContext context. (These are used to convey + // parent context about how addresses being calculated will be used; see the + // specification comment for MorphAddrContext for full details.) + // Assume it's an Ind context to start. + MorphAddrContext subIndMac1(MACK_Ind); + MorphAddrContext* subMac1 = mac; + if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind) + { + switch (tree->gtOper) + { + case GT_ADDR: + // A non-null mac here implies this node is part of an address computation. + // If so, we need to pass the existing mac down to the child node. + // + // Otherwise, use a new mac. + if (subMac1 == nullptr) + { + subMac1 = &subIndMac1; + subMac1->m_kind = MACK_Addr; + } + break; + case GT_COMMA: + // In a comma, the incoming context only applies to the rightmost arg of the + // comma list. The left arg (op1) gets a fresh context. + subMac1 = nullptr; + break; + case GT_OBJ: + case GT_BLK: + case GT_DYN_BLK: + case GT_IND: + // A non-null mac here implies this node is part of an address computation (the tree parent is + // GT_ADDR). + // If so, we need to pass the existing mac down to the child node. + // + // Otherwise, use a new mac. + if (subMac1 == nullptr) + { + subMac1 = &subIndMac1; + } + break; + default: + break; + } + } + + // For additions, if we're in an IND context keep track of whether + // all offsets added to the address are constant, and their sum. + if (tree->gtOper == GT_ADD && subMac1 != nullptr) + { + assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock. + GenTree* otherOp = tree->AsOp()->gtOp2; + // Is the other operator a constant? + if (otherOp->IsCnsIntOrI()) + { + ClrSafeInt totalOffset(subMac1->m_totalOffset); + totalOffset += otherOp->AsIntConCommon()->IconValue(); + if (totalOffset.IsOverflow()) + { + // We will consider an offset so large as to overflow as "not a constant" -- + // we will do a null check. + subMac1->m_allConstantOffsets = false; + } + else + { + subMac1->m_totalOffset += otherOp->AsIntConCommon()->IconValue(); + } + } + else + { + subMac1->m_allConstantOffsets = false; + } + } + + // If op1 is a GT_FIELD or indir, we need to pass down the mac if + // its parent is GT_ADDR, since the address of op1 + // is part of an ongoing address computation. Otherwise + // op1 represents the value of the field and so any address + // calculations it does are in a new context. + if (((op1->gtOper == GT_FIELD) || op1->OperIsIndir()) && (tree->gtOper != GT_ADDR)) + { + subMac1 = nullptr; + + // The impact of op1's value to any ongoing + // address computation is handled below when looking + // at op2. + } + + tree->AsOp()->gtOp1 = op1 = fgMorphTree(op1, subMac1); + +#if LOCAL_ASSERTION_PROP + // If we are exiting the "then" part of a Qmark-Colon we must + // save the state of the current copy assignment table + // so that we can merge this state with the "else" part exit + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + if (optAssertionCount) + { + noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea + unsigned tabSize = optAssertionCount * sizeof(AssertionDsc); + thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize); + thenAssertionCount = optAssertionCount; + memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize); + } + else + { + thenAssertionCount = 0; + thenAssertionTab = nullptr; + } + } +#endif // LOCAL_ASSERTION_PROP + + /* Morphing along with folding and inlining may have changed the + * side effect flags, so we have to reset them + * + * NOTE: Don't reset the exception flags on nodes that may throw */ + + assert(tree->gtOper != GT_CALL); + + if (!tree->OperRequiresCallFlag(this)) + { + tree->gtFlags &= ~GTF_CALL; + } + + /* Propagate the new flags */ + tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); + + // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does + // Similarly for clsVar + if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR)) + { + tree->gtFlags &= ~GTF_GLOB_REF; + } + } // if (op1) + + /*------------------------------------------------------------------------- + * Process the second operand, if any + */ + + if (op2) + { + +#if LOCAL_ASSERTION_PROP + // If we are entering the "else" part of a Qmark-Colon we must + // reset the state of the current copy assignment table + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + optAssertionReset(0); + if (origAssertionCount) + { + size_t tabSize = origAssertionCount * sizeof(AssertionDsc); + memcpy(optAssertionTabPrivate, origAssertionTab, tabSize); + optAssertionReset(origAssertionCount); + } + } +#endif // LOCAL_ASSERTION_PROP + + // We might need a new MorphAddressContext context to use in evaluating op2. + // (These are used to convey parent context about how addresses being calculated + // will be used; see the specification comment for MorphAddrContext for full details.) + // Assume it's an Ind context to start. + switch (tree->gtOper) + { + case GT_ADD: + if (mac != nullptr && mac->m_kind == MACK_Ind) + { + GenTree* otherOp = tree->AsOp()->gtOp1; + // Is the other operator a constant? + if (otherOp->IsCnsIntOrI()) + { + mac->m_totalOffset += otherOp->AsIntConCommon()->IconValue(); + } + else + { + mac->m_allConstantOffsets = false; + } + } + break; + default: + break; + } + + // If op2 is a GT_FIELD or indir, we must be taking its value, + // so it should evaluate its address in a new context. + if ((op2->gtOper == GT_FIELD) || op2->OperIsIndir()) + { + // The impact of op2's value to any ongoing + // address computation is handled above when looking + // at op1. + mac = nullptr; + } + + tree->AsOp()->gtOp2 = op2 = fgMorphTree(op2, mac); + + /* Propagate the side effect flags from op2 */ + + tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT); + +#if LOCAL_ASSERTION_PROP + // If we are exiting the "else" part of a Qmark-Colon we must + // merge the state of the current copy assignment table with + // that of the exit of the "then" part. + if (isQmarkColon) + { + noway_assert(optLocalAssertionProp); + // If either exit table has zero entries then + // the merged table also has zero entries + if (optAssertionCount == 0 || thenAssertionCount == 0) + { + optAssertionReset(0); + } + else + { + size_t tabSize = optAssertionCount * sizeof(AssertionDsc); + if ((optAssertionCount != thenAssertionCount) || + (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0)) + { + // Yes they are different so we have to find the merged set + // Iterate over the copy asgn table removing any entries + // that do not have an exact match in the thenAssertionTab + AssertionIndex index = 1; + while (index <= optAssertionCount) + { + AssertionDsc* curAssertion = optGetAssertion(index); + + for (unsigned j = 0; j < thenAssertionCount; j++) + { + AssertionDsc* thenAssertion = &thenAssertionTab[j]; + + // Do the left sides match? + if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) && + (curAssertion->assertionKind == thenAssertion->assertionKind)) + { + // Do the right sides match? + if ((curAssertion->op2.kind == thenAssertion->op2.kind) && + (curAssertion->op2.lconVal == thenAssertion->op2.lconVal)) + { + goto KEEP; + } + else + { + goto REMOVE; + } + } + } + // + // If we fall out of the loop above then we didn't find + // any matching entry in the thenAssertionTab so it must + // have been killed on that path so we remove it here + // + REMOVE: + // The data at optAssertionTabPrivate[i] is to be removed + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef DEBUG + if (verbose) + { + printf("The QMARK-COLON "); + printTreeID(tree); + printf(" removes assertion candidate #%d\n", index); + } +#endif + optAssertionRemove(index); + continue; + KEEP: + // The data at optAssertionTabPrivate[i] is to be kept + index++; + } + } + } + } +#endif // LOCAL_ASSERTION_PROP + } // if (op2) + +DONE_MORPHING_CHILDREN: + + if (tree->OperIsIndirOrArrLength()) + { + tree->SetIndirExceptionFlags(this); + } + else + { + if (tree->OperMayThrow(this)) + { + // Mark the tree node as potentially throwing an exception + tree->gtFlags |= GTF_EXCEPT; + } + else + { + if (((op1 == nullptr) || ((op1->gtFlags & GTF_EXCEPT) == 0)) && + ((op2 == nullptr) || ((op2->gtFlags & GTF_EXCEPT) == 0))) + { + tree->gtFlags &= ~GTF_EXCEPT; + } + } + } + + if (tree->OperRequiresAsgFlag()) + { + tree->gtFlags |= GTF_ASG; + } + else + { + if (((op1 == nullptr) || ((op1->gtFlags & GTF_ASG) == 0)) && + ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) + { + tree->gtFlags &= ~GTF_ASG; + } + } + + if (tree->OperRequiresCallFlag(this)) + { + tree->gtFlags |= GTF_CALL; + } + else + { + if (((op1 == nullptr) || ((op1->gtFlags & GTF_CALL) == 0)) && + ((op2 == nullptr) || ((op2->gtFlags & GTF_CALL) == 0))) + { + tree->gtFlags &= ~GTF_CALL; + } + } + /*------------------------------------------------------------------------- + * Now do POST-ORDER processing + */ + + if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet()))) + { + // The tree is really not GC but was marked as such. Now that the + // children have been unmarked, unmark the tree too. + + // Remember that GT_COMMA inherits it's type only from op2 + if (tree->gtOper == GT_COMMA) + { + tree->gtType = genActualType(op2->TypeGet()); + } + else + { + tree->gtType = genActualType(op1->TypeGet()); + } + } + + GenTree* oldTree = tree; + + GenTree* qmarkOp1 = nullptr; + GenTree* qmarkOp2 = nullptr; + + if ((tree->OperGet() == GT_QMARK) && (tree->AsOp()->gtOp2->OperGet() == GT_COLON)) + { + qmarkOp1 = oldTree->AsOp()->gtOp2->AsOp()->gtOp1; + qmarkOp2 = oldTree->AsOp()->gtOp2->AsOp()->gtOp2; + } + + // Try to fold it, maybe we get lucky, + tree = gtFoldExpr(tree); + + if (oldTree != tree) + { + /* if gtFoldExpr returned op1 or op2 then we are done */ + if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2)) + { + return tree; + } + + /* If we created a comma-throw tree then we need to morph op1 */ + if (fgIsCommaThrow(tree)) + { + tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1); + fgMorphTreeDone(tree); + return tree; + } + + return tree; + } + else if (tree->OperKind() & GTK_CONST) + { + return tree; + } + + /* gtFoldExpr could have used setOper to change the oper */ + oper = tree->OperGet(); + typ = tree->TypeGet(); + + /* gtFoldExpr could have changed op1 and op2 */ + op1 = tree->AsOp()->gtOp1; + op2 = tree->gtGetOp2IfPresent(); + + // Do we have an integer compare operation? + // + if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet())) + { + // Are we comparing against zero? + // + if (op2->IsIntegralConst(0)) + { + // Request that the codegen for op1 sets the condition flags + // when it generates the code for op1. + // + // Codegen for op1 must set the condition flags if + // this method returns true. + // + op1->gtRequestSetFlags(); + } + } + /*------------------------------------------------------------------------- + * Perform the required oper-specific postorder morphing + */ + + GenTree* temp; + GenTree* cns1; + GenTree* cns2; + size_t ival1, ival2; + GenTree* lclVarTree; + GenTree* effectiveOp1; + FieldSeqNode* fieldSeq = nullptr; + + switch (oper) + { + case GT_ASG: + + if (op1->OperIs(GT_LCL_VAR) && ((op1->gtFlags & GTF_VAR_FOLDED_IND) != 0)) + { + op1->gtFlags &= ~GTF_VAR_FOLDED_IND; + tree = fgDoNormalizeOnStore(tree); + op2 = tree->gtGetOp2(); + } + + lclVarTree = fgIsIndirOfAddrOfLocal(op1); + if (lclVarTree != nullptr) + { + lclVarTree->gtFlags |= GTF_VAR_DEF; + } + + effectiveOp1 = op1->gtEffectiveVal(); + + if (effectiveOp1->OperIsConst()) + { + op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1); + tree->AsOp()->gtOp1 = op1; + } + + /* If we are storing a small type, we might be able to omit a cast */ + if ((effectiveOp1->gtOper == GT_IND) && varTypeIsSmall(effectiveOp1->TypeGet())) + { + if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow()) + { + var_types castType = op2->CastToType(); + + // If we are performing a narrowing cast and + // castType is larger or the same as op1's type + // then we can discard the cast. + + if (varTypeIsSmall(castType) && (genTypeSize(castType) >= genTypeSize(effectiveOp1->TypeGet()))) + { + tree->AsOp()->gtOp2 = op2 = op2->AsCast()->CastOp(); + } + } + else if (op2->OperIsCompare() && varTypeIsByte(effectiveOp1->TypeGet())) + { + /* We don't need to zero extend the setcc instruction */ + op2->gtType = TYP_BYTE; + } + } + // If we introduced a CSE we may need to undo the optimization above + // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type) + // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place. + else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR)) + { + unsigned varNum = op1->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[varNum]; + + /* We again need to zero extend the setcc instruction */ + op2->gtType = varDsc->TypeGet(); + } + fgAssignSetVarDef(tree); + + /* We can't CSE the LHS of an assignment */ + /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */ + if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT)) + { + op1->gtFlags |= GTF_DONT_CSE; + } + break; + + case GT_EQ: + case GT_NE: + + /* Make sure we're allowed to do this */ + + if (optValnumCSE_phase) + { + // It is not safe to reorder/delete CSE's + break; + } + + // Pattern-matching optimization: + // (a % c) ==/!= 0 + // for power-of-2 constant `c` + // => + // a & (c - 1) ==/!= 0 + // For integer `a`, even if negative. + if (opts.OptimizationEnabled()) + { + GenTree* op1 = tree->AsOp()->gtOp1; + GenTree* op2 = tree->AsOp()->gtOp2; + if (op1->OperIs(GT_MOD) && varTypeIsIntegral(op1->TypeGet()) && op2->IsIntegralConst(0)) + { + GenTree* op1op2 = op1->AsOp()->gtOp2; + if (op1op2->IsCnsIntOrI()) + { + ssize_t modValue = op1op2->AsIntCon()->IconValue(); + if (isPow2(modValue)) + { + op1->SetOper(GT_AND); // Change % => & + op1op2->AsIntConCommon()->SetIconValue(modValue - 1); // Change c => c - 1 + } + } + } + } + + cns2 = op2; + + /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */ + + if (cns2->gtOper == GT_CNS_INT && cns2->AsIntCon()->gtIconVal != 0) + { + op1 = tree->AsOp()->gtOp1; + + /* Since this can occur repeatedly we use a while loop */ + + while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) && (op1->AsOp()->gtOp2->gtOper == GT_CNS_INT) && + (op1->gtType == TYP_INT) && (op1->gtOverflow() == false)) + { + /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */ + + ival1 = op1->AsOp()->gtOp2->AsIntCon()->gtIconVal; + ival2 = cns2->AsIntCon()->gtIconVal; + + if (op1->gtOper == GT_ADD) + { + ival2 -= ival1; + } + else + { + ival2 += ival1; + } + cns2->AsIntCon()->gtIconVal = ival2; + +#ifdef TARGET_64BIT + // we need to properly re-sign-extend or truncate as needed. + cns2->AsIntCon()->TruncateOrSignExtend32(); +#endif // TARGET_64BIT + + op1 = tree->AsOp()->gtOp1 = op1->AsOp()->gtOp1; + } + } + + // + // Here we look for the following tree + // + // EQ/NE + // / \. + // op1 CNS 0/1 + // + ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1 + + // cast to unsigned allows test for both 0 and 1 + if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->AsIntConCommon()->IconValue()) <= 1U)) + { + ival2 = (size_t)cns2->AsIntConCommon()->IconValue(); + } + else // cast to UINT64 allows test for both 0 and 1 + if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->AsIntConCommon()->LngValue()) <= 1ULL)) + { + ival2 = (size_t)cns2->AsIntConCommon()->LngValue(); + } + + if (ival2 != INT_MAX) + { + // If we don't have a comma and relop, we can't do this optimization + // + if ((op1->gtOper == GT_COMMA) && (op1->AsOp()->gtOp2->OperIsCompare())) + { + // Here we look for the following transformation + // + // EQ/NE Possible REVERSE(RELOP) + // / \ / \. + // COMMA CNS 0/1 -> COMMA relop_op2 + // / \ / \. + // x RELOP x relop_op1 + // / \. + // relop_op1 relop_op2 + // + // + // + GenTree* comma = op1; + GenTree* relop = comma->AsOp()->gtOp2; + + GenTree* relop_op1 = relop->AsOp()->gtOp1; + + bool reverse = ((ival2 == 0) == (oper == GT_EQ)); + + if (reverse) + { + gtReverseCond(relop); + } + + relop->AsOp()->gtOp1 = comma; + comma->AsOp()->gtOp2 = relop_op1; + + // Comma now has fewer nodes underneath it, so we need to regenerate its flags + comma->gtFlags &= ~GTF_ALL_EFFECT; + comma->gtFlags |= (comma->AsOp()->gtOp1->gtFlags) & GTF_ALL_EFFECT; + comma->gtFlags |= (comma->AsOp()->gtOp2->gtFlags) & GTF_ALL_EFFECT; + + noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0); + noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0); + relop->gtFlags |= + tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT); + + return relop; + } + + if (op1->gtOper == GT_COMMA) + { + // Here we look for the following tree + // and when the LCL_VAR is a temp we can fold the tree: + // + // EQ/NE EQ/NE + // / \ / \. + // COMMA CNS 0/1 -> RELOP CNS 0/1 + // / \ / \. + // ASG LCL_VAR + // / \. + // LCL_VAR RELOP + // / \. + // + + GenTree* asg = op1->AsOp()->gtOp1; + GenTree* lcl = op1->AsOp()->gtOp2; + + /* Make sure that the left side of the comma is the assignment of the LCL_VAR */ + if (asg->gtOper != GT_ASG) + { + goto SKIP; + } + + /* The right side of the comma must be a LCL_VAR temp */ + if (lcl->gtOper != GT_LCL_VAR) + { + goto SKIP; + } + + unsigned lclNum = lcl->AsLclVarCommon()->GetLclNum(); + noway_assert(lclNum < lvaCount); + + /* If the LCL_VAR is not a temp then bail, a temp has a single def */ + if (!lvaTable[lclNum].lvIsTemp) + { + goto SKIP; + } + + /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */ + // Fix 383856 X86/ARM ILGEN + if (lclNumIsCSE(lclNum)) + { + goto SKIP; + } + + /* We also must be assigning the result of a RELOP */ + if (asg->AsOp()->gtOp1->gtOper != GT_LCL_VAR) + { + goto SKIP; + } + + /* Both of the LCL_VAR must match */ + if (asg->AsOp()->gtOp1->AsLclVarCommon()->GetLclNum() != lclNum) + { + goto SKIP; + } + + /* If right side of asg is not a RELOP then skip */ + if (!asg->AsOp()->gtOp2->OperIsCompare()) + { + goto SKIP; + } + + /* Set op1 to the right side of asg, (i.e. the RELOP) */ + op1 = asg->AsOp()->gtOp2; + + DEBUG_DESTROY_NODE(asg->AsOp()->gtOp1); + DEBUG_DESTROY_NODE(lcl); + } + + if (op1->OperIsCompare()) + { + // Here we look for the following tree + // + // EQ/NE -> RELOP/!RELOP + // / \ / \. + // RELOP CNS 0/1 + // / \. + // + // Note that we will remove/destroy the EQ/NE node and move + // the RELOP up into it's location. + + /* Here we reverse the RELOP if necessary */ + + bool reverse = ((ival2 == 0) == (oper == GT_EQ)); + + if (reverse) + { + gtReverseCond(op1); + } + + /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */ + op1->gtType = tree->gtType; + + noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0); + op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE); + + DEBUG_DESTROY_NODE(tree); + return op1; + } + + // + // Now we check for a compare with the result of an '&' operator + // + // Here we look for the following transformation: + // + // EQ/NE EQ/NE + // / \ / \. + // AND CNS 0/1 -> AND CNS 0 + // / \ / \. + // RSZ/RSH CNS 1 x CNS (1 << y) + // / \. + // x CNS_INT +y + + if (op1->gtOper == GT_AND) + { + GenTree* andOp = op1; + GenTree* rshiftOp = andOp->AsOp()->gtOp1; + + if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH)) + { + goto SKIP; + } + + if (!rshiftOp->AsOp()->gtOp2->IsCnsIntOrI()) + { + goto SKIP; + } + + ssize_t shiftAmount = rshiftOp->AsOp()->gtOp2->AsIntCon()->gtIconVal; + + if (shiftAmount < 0) + { + goto SKIP; + } + + if (!andOp->AsOp()->gtOp2->IsIntegralConst(1)) + { + goto SKIP; + } + + if (andOp->gtType == TYP_INT) + { + if (shiftAmount > 31) + { + goto SKIP; + } + + UINT32 newAndOperand = ((UINT32)1) << shiftAmount; + + andOp->AsOp()->gtOp2->AsIntCon()->gtIconVal = newAndOperand; + + // Reverse the cond if necessary + if (ival2 == 1) + { + gtReverseCond(tree); + cns2->AsIntCon()->gtIconVal = 0; + oper = tree->gtOper; + } + } + else if (andOp->gtType == TYP_LONG) + { + if (shiftAmount > 63) + { + goto SKIP; + } + + UINT64 newAndOperand = ((UINT64)1) << shiftAmount; + + andOp->AsOp()->gtOp2->AsIntConCommon()->SetLngValue(newAndOperand); + + // Reverse the cond if necessary + if (ival2 == 1) + { + gtReverseCond(tree); + cns2->AsIntConCommon()->SetLngValue(0); + oper = tree->gtOper; + } + } + + andOp->AsOp()->gtOp1 = rshiftOp->AsOp()->gtOp1; + + DEBUG_DESTROY_NODE(rshiftOp->AsOp()->gtOp2); + DEBUG_DESTROY_NODE(rshiftOp); + } + } // END if (ival2 != INT_MAX) + + SKIP: + /* Now check for compares with small constant longs that can be cast to int */ + + if (!cns2->OperIsConst()) + { + goto COMPARE; + } + + if (cns2->TypeGet() != TYP_LONG) + { + goto COMPARE; + } + + /* Is the constant 31 bits or smaller? */ + + if ((cns2->AsIntConCommon()->LngValue() >> 31) != 0) + { + goto COMPARE; + } + + /* Is the first comparand mask operation of type long ? */ + + if (op1->gtOper != GT_AND) + { + /* Another interesting case: cast from int */ + + if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT && + !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate + !op1->gtOverflow()) // cannot be an overflow checking cast + { + /* Simply make this into an integer comparison */ + + tree->AsOp()->gtOp1 = op1->AsCast()->CastOp(); + tree->AsOp()->gtOp2 = gtNewIconNode((int)cns2->AsIntConCommon()->LngValue(), TYP_INT); + } + + goto COMPARE; + } + + noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND); + + /* Is the result of the mask effectively an INT ? */ + + GenTree* andMask; + andMask = op1->AsOp()->gtOp2; + if (andMask->gtOper != GT_CNS_NATIVELONG) + { + goto COMPARE; + } + if ((andMask->AsIntConCommon()->LngValue() >> 32) != 0) + { + goto COMPARE; + } + + /* Now we know that we can cast AsOp()->gtOp1 of AND to int */ + + op1->AsOp()->gtOp1 = gtNewCastNode(TYP_INT, op1->AsOp()->gtOp1, false, TYP_INT); + + /* now replace the mask node (AsOp()->gtOp2 of AND node) */ + + noway_assert(andMask == op1->AsOp()->gtOp2); + + ival1 = (int)andMask->AsIntConCommon()->LngValue(); + andMask->SetOper(GT_CNS_INT); + andMask->gtType = TYP_INT; + andMask->AsIntCon()->gtIconVal = ival1; + + /* now change the type of the AND node */ + + op1->gtType = TYP_INT; + + /* finally we replace the comparand */ + + ival2 = (int)cns2->AsIntConCommon()->LngValue(); + cns2->SetOper(GT_CNS_INT); + cns2->gtType = TYP_INT; + + noway_assert(cns2 == op2); + cns2->AsIntCon()->gtIconVal = ival2; + + goto COMPARE; + + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + + if (op2->gtOper == GT_CNS_INT) + { + cns2 = op2; + /* Check for "expr relop 1" */ + if (cns2->IsIntegralConst(1)) + { + /* Check for "expr >= 1" */ + if (oper == GT_GE) + { + /* Change to "expr != 0" for unsigned and "expr > 0" for signed */ + oper = (tree->IsUnsigned()) ? GT_NE : GT_GT; + goto SET_OPER; + } + /* Check for "expr < 1" */ + else if (oper == GT_LT) + { + /* Change to "expr == 0" for unsigned and "expr <= 0" for signed */ + oper = (tree->IsUnsigned()) ? GT_EQ : GT_LE; + goto SET_OPER; + } + } + /* Check for "expr relop -1" */ + else if (!tree->IsUnsigned() && cns2->IsIntegralConst(-1)) + { + /* Check for "expr <= -1" */ + if (oper == GT_LE) + { + /* Change to "expr < 0" */ + oper = GT_LT; + goto SET_OPER; + } + /* Check for "expr > -1" */ + else if (oper == GT_GT) + { + /* Change to "expr >= 0" */ + oper = GT_GE; + + SET_OPER: + // IF we get here we should be changing 'oper' + assert(tree->OperGet() != oper); + + // Keep the old ValueNumber for 'tree' as the new expr + // will still compute the same value as before + tree->SetOper(oper, GenTree::PRESERVE_VN); + cns2->AsIntCon()->gtIconVal = 0; + + // vnStore is null before the ValueNumber phase has run + if (vnStore != nullptr) + { + // Update the ValueNumber for 'cns2', as we just changed it to 0 + fgValueNumberTreeConst(cns2); + } + op2 = tree->AsOp()->gtOp2 = gtFoldExpr(op2); + } + } + else if (tree->IsUnsigned() && op2->IsIntegralConst(0)) + { + if ((oper == GT_GT) || (oper == GT_LE)) + { + // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT + // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails + // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0) + // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes + // occurs as a result of branch inversion. + oper = (oper == GT_LE) ? GT_EQ : GT_NE; + tree->SetOper(oper, GenTree::PRESERVE_VN); + tree->gtFlags &= ~GTF_UNSIGNED; + } + } + } + + COMPARE: + + noway_assert(tree->OperKind() & GTK_RELOP); + break; + + case GT_MUL: + +#ifndef TARGET_64BIT + if (typ == TYP_LONG) + { + // This must be GTF_MUL_64RSLT + assert(tree->gtIsValid64RsltMul()); + return tree; + } +#endif // TARGET_64BIT + goto CM_OVF_OP; + + case GT_SUB: + + if (tree->gtOverflow()) + { + goto CM_OVF_OP; + } + + // TODO #4104: there are a lot of other places where + // this condition is not checked before transformations. + if (fgGlobalMorph) + { + /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */ + + noway_assert(op2); + if (op2->IsCnsIntOrI() && !op2->IsIconHandle()) + { + // Negate the constant and change the node to be "+", + // except when `op2` is a const byref. + + op2->AsIntConCommon()->SetIconValue(-op2->AsIntConCommon()->IconValue()); + op2->AsIntConRef().gtFieldSeq = FieldSeqStore::NotAField(); + oper = GT_ADD; + tree->ChangeOper(oper); + goto CM_ADD_OP; + } + + /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */ + + noway_assert(op1); + if (op1->IsCnsIntOrI()) + { + noway_assert(varTypeIsIntOrI(tree)); + + // The type of the new GT_NEG node cannot just be op2->TypeGet(). + // Otherwise we may sign-extend incorrectly in cases where the GT_NEG + // node ends up feeding directly into a cast, for example in + // GT_CAST(GT_SUB(0, s_1.ubyte)) + tree->AsOp()->gtOp2 = op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2); + fgMorphTreeDone(op2); + + oper = GT_ADD; + tree->ChangeOper(oper); + goto CM_ADD_OP; + } + + /* No match - exit */ + } + + // Skip optimization if non-NEG operand is constant. + // Both op1 and op2 are not constant because it was already checked above. + if (opts.OptimizationEnabled() && fgGlobalMorph && + (((op1->gtFlags & GTF_EXCEPT) == 0) || ((op2->gtFlags & GTF_EXCEPT) == 0))) + { + // a - -b = > a + b + // SUB(a, (NEG(b)) => ADD(a, b) + + if (!op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG)) + { + // tree: SUB + // op1: a + // op2: NEG + // op2Child: b + + GenTree* op2Child = op2->AsOp()->gtOp1; // b + oper = GT_ADD; + tree->SetOper(oper, GenTree::PRESERVE_VN); + tree->AsOp()->gtOp2 = op2Child; + + DEBUG_DESTROY_NODE(op2); + + op2 = op2Child; + } + + // -a - -b = > b - a + // SUB(NEG(a), (NEG(b)) => SUB(b, a) + + if (op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG)) + { + // tree: SUB + // op1: NEG + // op1Child: a + // op2: NEG + // op2Child: b + + GenTree* op1Child = op1->AsOp()->gtOp1; // a + GenTree* op2Child = op2->AsOp()->gtOp1; // b + tree->AsOp()->gtOp1 = op2Child; + tree->AsOp()->gtOp2 = op1Child; + + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(op2); + + op1 = op2Child; + op2 = op1Child; + } + } + + break; + +#ifdef TARGET_ARM64 + case GT_DIV: + if (!varTypeIsFloating(tree->gtType)) + { + // Codegen for this instruction needs to be able to throw two exceptions: + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); + } + break; + case GT_UDIV: + // Codegen for this instruction needs to be able to throw one exception: + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO); + break; +#endif + + case GT_ADD: + + CM_OVF_OP: + if (tree->gtOverflow()) + { + tree->gtRequestSetFlags(); + + // Add the excptn-throwing basic block to jump to on overflow + + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); + + // We can't do any commutative morphing for overflow instructions + + break; + } + + CM_ADD_OP: + + case GT_OR: + case GT_XOR: + case GT_AND: + if (op2->OperIs(GT_NEG) && op1->IsLocal() && varTypeIsIntOrI(op1->TypeGet())) + { + if (tree->Compare(op1, op2->gtGetOp1())) + { + tree->SetOper(GT_CNS_INT); + tree->AsIntCon()->gtIconVal = 1; + return tree; + } + } + + /* Commute any non-REF constants to the right */ + + noway_assert(op1); + if (op1->OperIsConst() && (op1->gtType != TYP_REF)) + { + // TODO-Review: We used to assert here that + // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD)); + // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation, + // and would sometimes hit this assertion. This may indicate a missed "remorph". + // Task is to re-enable this assertion and investigate. + + /* Swap the operands */ + tree->AsOp()->gtOp1 = op2; + tree->AsOp()->gtOp2 = op1; + + op1 = op2; + op2 = tree->AsOp()->gtOp2; + } + + // See if we can fold floating point operations (can regress minopts mode) + if (opts.OptimizationEnabled() && varTypeIsFloating(tree->TypeGet()) && !optValnumCSE_phase) + { + if ((oper == GT_MUL) && !op1->IsCnsFltOrDbl() && op2->IsCnsFltOrDbl()) + { + if (op2->AsDblCon()->gtDconVal == 2.0) + { + bool needsComma = !op1->OperIsLeaf() && !op1->IsLocal(); + // if op1 is not a leaf/local we have to introduce a temp via GT_COMMA. + // Unfortunately, it's not optHoistLoopCode-friendly yet so let's do it later. + if (!needsComma || (fgOrder == FGOrderLinear)) + { + // Fold "x*2.0" to "x+x" + op2 = fgMakeMultiUse(&tree->AsOp()->gtOp1); + op1 = tree->AsOp()->gtOp1; + oper = GT_ADD; + tree = gtNewOperNode(oper, tree->TypeGet(), op1, op2); + INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + } + } + else if (op2->AsDblCon()->gtDconVal == 1.0) + { + // Fold "x*1.0" to "x" + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + return op1; + } + } + } + + /* See if we can fold GT_ADD nodes. */ + + if (oper == GT_ADD) + { + /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */ + + if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) && + op1->AsOp()->gtOp2->gtOper == GT_CNS_INT && op2->AsOp()->gtOp2->gtOper == GT_CNS_INT && + !op1->gtOverflow() && !op2->gtOverflow()) + { + // Don't create a byref pointer that may point outside of the ref object. + // If a GC happens, the byref won't get updated. This can happen if one + // of the int components is negative. It also requires the address generation + // be in a fully-interruptible code region. + if (!varTypeIsGC(op1->AsOp()->gtOp1->TypeGet()) && !varTypeIsGC(op2->AsOp()->gtOp1->TypeGet())) + { + cns1 = op1->AsOp()->gtOp2; + cns2 = op2->AsOp()->gtOp2; + cns1->AsIntCon()->gtIconVal += cns2->AsIntCon()->gtIconVal; +#ifdef TARGET_64BIT + if (cns1->TypeGet() == TYP_INT) + { + // we need to properly re-sign-extend or truncate after adding two int constants above + cns1->AsIntCon()->TruncateOrSignExtend32(); + } +#endif // TARGET_64BIT + + tree->AsOp()->gtOp2 = cns1; + DEBUG_DESTROY_NODE(cns2); + + op1->AsOp()->gtOp2 = op2->AsOp()->gtOp1; + op1->gtFlags |= (op1->AsOp()->gtOp2->gtFlags & GTF_ALL_EFFECT); + DEBUG_DESTROY_NODE(op2); + op2 = tree->AsOp()->gtOp2; + } + } + + if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ)) + { + CLANG_FORMAT_COMMENT_ANCHOR; + + // Fold (x + 0). + + if ((op2->AsIntConCommon()->IconValue() == 0) && !gtIsActiveCSE_Candidate(tree)) + { + + // If this addition is adding an offset to a null pointer, + // avoid the work and yield the null pointer immediately. + // Dereferencing the pointer in either case will have the + // same effect. + + if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) && + ((op1->gtFlags & GTF_ALL_EFFECT) == 0)) + { + op2->gtType = tree->gtType; + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(tree); + return op2; + } + + // Remove the addition iff it won't change the tree type + // to TYP_REF. + + if (!gtIsActiveCSE_Candidate(op2) && + ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF))) + { + if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) && + (op2->AsIntCon()->gtFieldSeq != nullptr) && + (op2->AsIntCon()->gtFieldSeq != FieldSeqStore::NotAField())) + { + fgAddFieldSeqForZeroOffset(op1, op2->AsIntCon()->gtFieldSeq); + } + + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + + return op1; + } + } + } + + if (opts.OptimizationEnabled() && fgGlobalMorph && + (((op1->gtFlags & GTF_EXCEPT) == 0) || ((op2->gtFlags & GTF_EXCEPT) == 0))) + { + // - a + b = > b - a + // ADD((NEG(a), b) => SUB(b, a) + + // Skip optimization if non-NEG operand is constant. + if (op1->OperIs(GT_NEG) && !op2->OperIs(GT_NEG) && + !(op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))) + { + // tree: ADD + // op1: NEG + // op2: b + // op1Child: a + + GenTree* op1Child = op1->AsOp()->gtOp1; // a + oper = GT_SUB; + tree->SetOper(oper, GenTree::PRESERVE_VN); + tree->AsOp()->gtOp1 = op2; + tree->AsOp()->gtOp2 = op1Child; + + DEBUG_DESTROY_NODE(op1); + + op1 = op2; + op2 = op1Child; + } + + // a + -b = > a - b + // ADD(a, (NEG(b)) => SUB(a, b) + + if (!op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG)) + { + // a is non cosntant because it was already canonicalized to have + // variable on the left and constant on the right. + + // tree: ADD + // op1: a + // op2: NEG + // op2Child: b + + GenTree* op2Child = op2->AsOp()->gtOp1; // a + oper = GT_SUB; + tree->SetOper(oper, GenTree::PRESERVE_VN); + tree->AsOp()->gtOp2 = op2Child; + + DEBUG_DESTROY_NODE(op2); + + op2 = op2Child; + } + } + } + /* See if we can fold GT_MUL by const nodes */ + else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase) + { +#ifndef TARGET_64BIT + noway_assert(typ <= TYP_UINT); +#endif // TARGET_64BIT + noway_assert(!tree->gtOverflow()); + + ssize_t mult = op2->AsIntConCommon()->IconValue(); + bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->AsIntCon()->gtFieldSeq != nullptr && + op2->AsIntCon()->gtFieldSeq->IsConstantIndexFieldSeq(); + + assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr); + + if (mult == 0) + { + // We may be able to throw away op1 (unless it has side-effects) + + if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0) + { + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(tree); + return op2; // Just return the "0" node + } + + // We need to keep op1 for the side-effects. Hang it off + // a GT_COMMA node + + tree->ChangeOper(GT_COMMA); + return tree; + } + + size_t abs_mult = (mult >= 0) ? mult : -mult; + size_t lowestBit = genFindLowestBit(abs_mult); + bool changeToShift = false; + + // is it a power of two? (positive or negative) + if (abs_mult == lowestBit) + { + // if negative negate (min-int does not need negation) + if (mult < 0 && mult != SSIZE_T_MIN) + { + // The type of the new GT_NEG node cannot just be op1->TypeGet(). + // Otherwise we may sign-extend incorrectly in cases where the GT_NEG + // node ends up feeding directly a cast, for example in + // GT_CAST(GT_MUL(-1, s_1.ubyte)) + tree->AsOp()->gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); + fgMorphTreeDone(op1); + } + + // If "op2" is a constant array index, the other multiplicand must be a constant. + // Transfer the annotation to the other one. + if (op2->OperGet() == GT_CNS_INT && op2->AsIntCon()->gtFieldSeq != nullptr && + op2->AsIntCon()->gtFieldSeq->IsConstantIndexFieldSeq()) + { + assert(op2->AsIntCon()->gtFieldSeq->m_next == nullptr); + GenTree* otherOp = op1; + if (otherOp->OperGet() == GT_NEG) + { + otherOp = otherOp->AsOp()->gtOp1; + } + assert(otherOp->OperGet() == GT_CNS_INT); + assert(otherOp->AsIntCon()->gtFieldSeq == FieldSeqStore::NotAField()); + otherOp->AsIntCon()->gtFieldSeq = op2->AsIntCon()->gtFieldSeq; + } + + if (abs_mult == 1) + { + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + return op1; + } + + /* Change the multiplication into a shift by log2(val) bits */ + op2->AsIntConCommon()->SetIconValue(genLog2(abs_mult)); + changeToShift = true; + } +#if LEA_AVAILABLE + else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult()) + { + int shift = genLog2(lowestBit); + ssize_t factor = abs_mult >> shift; + + if (factor == 3 || factor == 5 || factor == 9) + { + // if negative negate (min-int does not need negation) + if (mult < 0 && mult != SSIZE_T_MIN) + { + tree->AsOp()->gtOp1 = op1 = gtNewOperNode(GT_NEG, genActualType(op1->TypeGet()), op1); + fgMorphTreeDone(op1); + } + + GenTree* factorIcon = gtNewIconNode(factor, TYP_I_IMPL); + if (op2IsConstIndex) + { + factorIcon->AsIntCon()->gtFieldSeq = + GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField); + } + + // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift + tree->AsOp()->gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon); + fgMorphTreeDone(op1); + + op2->AsIntConCommon()->SetIconValue(shift); + changeToShift = true; + } + } +#endif // LEA_AVAILABLE + if (changeToShift) + { + // vnStore is null before the ValueNumber phase has run + if (vnStore != nullptr) + { + // Update the ValueNumber for 'op2', as we just changed the constant + fgValueNumberTreeConst(op2); + } + oper = GT_LSH; + // Keep the old ValueNumber for 'tree' as the new expr + // will still compute the same value as before + tree->ChangeOper(oper, GenTree::PRESERVE_VN); + + goto DONE_MORPHING_CHILDREN; + } + } + else if (fgOperIsBitwiseRotationRoot(oper)) + { + tree = fgRecognizeAndMorphBitwiseRotation(tree); + + // fgRecognizeAndMorphBitwiseRotation may return a new tree + oper = tree->OperGet(); + typ = tree->TypeGet(); + op1 = tree->AsOp()->gtOp1; + op2 = tree->AsOp()->gtOp2; + } + + if (varTypeIsIntegralOrI(tree->TypeGet()) && tree->OperIs(GT_ADD, GT_MUL, GT_AND, GT_OR, GT_XOR)) + { + GenTree* foldedTree = fgMorphCommutative(tree->AsOp()); + if (foldedTree != nullptr) + { + tree = foldedTree; + op1 = tree->gtGetOp1(); + op2 = tree->gtGetOp2(); + if (!tree->OperIs(oper)) + { + return tree; + } + } + } + + break; + + case GT_NOT: + case GT_NEG: + // Remove double negation/not. + // Note: this is not a safe tranformation if "tree" is a CSE candidate. + // Consider for example the following expression: NEG(NEG(OP)), where the top-level + // NEG is a CSE candidate. Were we to morph this to just OP, CSE would fail to find + // the original NEG in the statement. + if (op1->OperIs(oper) && opts.OptimizationEnabled() && !gtIsActiveCSE_Candidate(tree)) + { + GenTree* child = op1->AsOp()->gtGetOp1(); + return child; + } + + // Distribute negation over simple multiplication/division expressions + if (opts.OptimizationEnabled() && !optValnumCSE_phase && tree->OperIs(GT_NEG) && + op1->OperIs(GT_MUL, GT_DIV)) + { + GenTreeOp* mulOrDiv = op1->AsOp(); + GenTree* op1op1 = mulOrDiv->gtGetOp1(); + GenTree* op1op2 = mulOrDiv->gtGetOp2(); + + if (!op1op1->IsCnsIntOrI() && op1op2->IsCnsIntOrI() && !op1op2->IsIconHandle()) + { + // NEG(MUL(a, C)) => MUL(a, -C) + // NEG(DIV(a, C)) => DIV(a, -C), except when C = {-1, 1} + ssize_t constVal = op1op2->AsIntCon()->IconValue(); + if ((mulOrDiv->OperIs(GT_DIV) && (constVal != -1) && (constVal != 1)) || + (mulOrDiv->OperIs(GT_MUL) && !mulOrDiv->gtOverflow())) + { + GenTree* newOp1 = op1op1; // a + GenTree* newOp2 = gtNewIconNode(-constVal, op1op2->TypeGet()); // -C + mulOrDiv->gtOp1 = newOp1; + mulOrDiv->gtOp2 = newOp2; + + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op1op2); + + return mulOrDiv; + } + } + } + + /* Any constant cases should have been folded earlier */ + noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase); + break; + + case GT_CKFINITE: + + noway_assert(varTypeIsFloating(op1->TypeGet())); + + fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN); + break; + + case GT_OBJ: + // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on + // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X + // is a local or clsVar, even if it has been address-exposed. + if (op1->OperGet() == GT_ADDR) + { + GenTreeUnOp* addr = op1->AsUnOp(); + GenTree* addrOp = addr->gtGetOp1(); + tree->gtFlags |= (addrOp->gtFlags & GTF_GLOB_REF); + } + break; + + case GT_IND: + { + // Can not remove a GT_IND if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(tree)) + { + break; + } + + bool foldAndReturnTemp = false; + temp = nullptr; + ival1 = 0; + + // Don't remove a volatile GT_IND, even if the address points to a local variable. + if ((tree->gtFlags & GTF_IND_VOLATILE) == 0) + { + /* Try to Fold *(&X) into X */ + if (op1->gtOper == GT_ADDR) + { + // Can not remove a GT_ADDR if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(op1)) + { + break; + } + + temp = op1->AsOp()->gtOp1; // X + + // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that + // they are the *same* struct type. In fact, they almost certainly aren't. If the + // address has an associated field sequence, that identifies this case; go through + // the "lcl_fld" path rather than this one. + FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below. + if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq)) + { + foldAndReturnTemp = true; + } + else if (temp->OperIsLocal()) + { + unsigned lclNum = temp->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[lclNum]; + + // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset + if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0)) + { + noway_assert(varTypeIsStruct(varDsc)); + + // We will try to optimize when we have a single field struct that is being struct promoted + if (varDsc->lvFieldCnt == 1) + { + unsigned lclNumFld = varDsc->lvFieldLclStart; + // just grab the promoted field + LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld]; + + // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset + // is zero + if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0)) + { + // We can just use the existing promoted field LclNum + temp->AsLclVarCommon()->SetLclNum(lclNumFld); + temp->gtType = fieldVarDsc->TypeGet(); + + foldAndReturnTemp = true; + } + } + } + // If the type of the IND (typ) is a "small int", and the type of the local has the + // same width, then we can reduce to just the local variable -- it will be + // correctly normalized. + // + // The below transformation cannot be applied if the local var needs to be normalized on load. + else if (varTypeIsSmall(typ) && (genTypeSize(varDsc) == genTypeSize(typ)) && + !lvaTable[lclNum].lvNormalizeOnLoad()) + { + const bool definitelyLoad = (tree->gtFlags & GTF_DONT_CSE) == 0; + const bool possiblyStore = !definitelyLoad; + + if (possiblyStore || (varTypeIsUnsigned(varDsc) == varTypeIsUnsigned(typ))) + { + typ = temp->TypeGet(); + tree->gtType = typ; + foldAndReturnTemp = true; + + if (possiblyStore) + { + // This node can be on the left-hand-side of an assignment node. + // Mark this node with GTF_VAR_FOLDED_IND to make sure that fgDoNormalizeOnStore() + // is called on its parent in post-order morph. + temp->gtFlags |= GTF_VAR_FOLDED_IND; + } + } + } + // For matching types we can fold + else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) && + !lvaTable[lclNum].lvNormalizeOnLoad()) + { + tree->gtType = typ = temp->TypeGet(); + foldAndReturnTemp = true; + } + else + { + // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e. + // nullptr) + assert(fieldSeq == nullptr); + bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq); + assert(b || fieldSeq == nullptr); + + if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD)) + { + // Append the field sequence, change the type. + temp->AsLclFld()->SetFieldSeq( + GetFieldSeqStore()->Append(temp->AsLclFld()->GetFieldSeq(), fieldSeq)); + temp->gtType = typ; + + foldAndReturnTemp = true; + } + } + // Otherwise will will fold this into a GT_LCL_FLD below + // where we check (temp != nullptr) + } + else // !temp->OperIsLocal() + { + // We don't try to fold away the GT_IND/GT_ADDR for this case + temp = nullptr; + } + } + else if (op1->OperGet() == GT_ADD) + { +#ifdef TARGET_ARM + // Check for a misalignment floating point indirection. + if (varTypeIsFloating(typ)) + { + GenTree* addOp2 = op1->AsOp()->gtGetOp2(); + if (addOp2->IsCnsIntOrI()) + { + ssize_t offset = addOp2->AsIntCon()->gtIconVal; + if ((offset % emitTypeSize(TYP_FLOAT)) != 0) + { + tree->gtFlags |= GTF_IND_UNALIGNED; + } + } + } +#endif // TARGET_ARM + + /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */ + + if (op1->AsOp()->gtOp1->OperGet() == GT_ADDR && op1->AsOp()->gtOp2->OperGet() == GT_CNS_INT && + opts.OptimizationEnabled()) + { + // No overflow arithmetic with pointers + noway_assert(!op1->gtOverflow()); + + temp = op1->AsOp()->gtOp1->AsOp()->gtOp1; + if (!temp->OperIsLocal()) + { + temp = nullptr; + break; + } + + // Can not remove the GT_ADDR if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(op1->AsOp()->gtOp1)) + { + break; + } + + ival1 = op1->AsOp()->gtOp2->AsIntCon()->gtIconVal; + fieldSeq = op1->AsOp()->gtOp2->AsIntCon()->gtFieldSeq; + + // Does the address have an associated zero-offset field sequence? + FieldSeqNode* addrFieldSeq = nullptr; + if (GetZeroOffsetFieldMap()->Lookup(op1->AsOp()->gtOp1, &addrFieldSeq)) + { + fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq); + } + + if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT) + { + noway_assert(!varTypeIsGC(temp->TypeGet())); + foldAndReturnTemp = true; + } + else + { + // The emitter can't handle large offsets + if (ival1 != (unsigned short)ival1) + { + break; + } + + // The emitter can get confused by invalid offsets + if (ival1 >= Compiler::lvaLclSize(temp->AsLclVarCommon()->GetLclNum())) + { + break; + } + } + // Now we can fold this into a GT_LCL_FLD below + // where we check (temp != nullptr) + } + } + } + + // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging: + // - We may have a load of a local where the load has a different type than the local + // - We may have a load of a local plus an offset + // + // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and + // offset if doing so is legal. The only cases in which this transformation is illegal are if the load + // begins before the local or if the load extends beyond the end of the local (i.e. if the load is + // out-of-bounds w.r.t. the local). + if ((temp != nullptr) && !foldAndReturnTemp) + { + assert(temp->OperIsLocal()); + + const unsigned lclNum = temp->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const varDsc = &lvaTable[lclNum]; + + const var_types tempTyp = temp->TypeGet(); + const bool useExactSize = varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK); + const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp); + + // Make sure we do not enregister this lclVar. + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); + + // If the size of the load is greater than the size of the lclVar, we cannot fold this access into + // a lclFld: the access represented by an lclFld node must begin at or after the start of the + // lclVar and must not extend beyond the end of the lclVar. + if ((ival1 >= 0) && ((ival1 + genTypeSize(typ)) <= varSize)) + { + GenTreeLclFld* lclFld; + + // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival' + // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival' + // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type. + // + if (temp->OperGet() == GT_LCL_FLD) + { + lclFld = temp->AsLclFld(); + lclFld->SetLclOffs(lclFld->GetLclOffs() + static_cast(ival1)); + lclFld->SetFieldSeq(GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeq)); + } + else // we have a GT_LCL_VAR + { + assert(temp->OperGet() == GT_LCL_VAR); + temp->ChangeOper(GT_LCL_FLD); // Note that this typically makes the gtFieldSeq "NotAField", + // unless there is a zero filed offset associated with 'temp'. + lclFld = temp->AsLclFld(); + lclFld->SetLclOffs(static_cast(ival1)); + + if (lclFld->GetFieldSeq() == FieldSeqStore::NotAField()) + { + if (fieldSeq != nullptr) + { + // If it does represent a field, note that. + lclFld->SetFieldSeq(fieldSeq); + } + } + else + { + // Append 'fieldSeq' to the existing one + lclFld->SetFieldSeq(GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeq)); + } + } + temp->gtType = tree->gtType; + foldAndReturnTemp = true; + } + } + + if (foldAndReturnTemp) + { + assert(temp != nullptr); + assert(temp->TypeGet() == typ); + assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR)); + + // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for + // 'temp' because a GT_ADDR always marks it for its operand. + temp->gtFlags &= ~GTF_DONT_CSE; + temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE); + + if (op1->OperGet() == GT_ADD) + { + DEBUG_DESTROY_NODE(op1->AsOp()->gtOp1); // GT_ADDR + DEBUG_DESTROY_NODE(op1->AsOp()->gtOp2); // GT_CNS_INT + } + DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR + DEBUG_DESTROY_NODE(tree); // GT_IND + + // If the result of the fold is a local var, we may need to perform further adjustments e.g. for + // normalization. + if (temp->OperIs(GT_LCL_VAR)) + { +#ifdef DEBUG + // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear + // and the node in question must have this bit set (as it has already been morphed). + temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + const bool forceRemorph = true; + temp = fgMorphLocalVar(temp, forceRemorph); +#ifdef DEBUG + // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the + // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function + // returns. + temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + } + + return temp; + } + + // Only do this optimization when we are in the global optimizer. Doing this after value numbering + // could result in an invalid value number for the newly generated GT_IND node. + if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph) + { + // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)). + // TBD: this transformation is currently necessary for correctness -- it might + // be good to analyze the failures that result if we don't do this, and fix them + // in other ways. Ideally, this should be optional. + GenTree* commaNode = op1; + unsigned treeFlags = tree->gtFlags; + commaNode->gtType = typ; + commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is + // dangerous, clear the GTF_REVERSE_OPS at + // least. +#ifdef DEBUG + commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + while (commaNode->AsOp()->gtOp2->gtOper == GT_COMMA) + { + commaNode = commaNode->AsOp()->gtOp2; + commaNode->gtType = typ; + commaNode->gtFlags = + (treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG & ~GTF_CALL); // Bashing the GT_COMMA flags here is + // dangerous, clear the GTF_REVERSE_OPS, GT_ASG, and GT_CALL at + // least. + commaNode->gtFlags |= ((commaNode->AsOp()->gtOp1->gtFlags | commaNode->AsOp()->gtOp2->gtFlags) & + (GTF_ASG | GTF_CALL)); +#ifdef DEBUG + commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + } + bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0; + ArrayInfo arrInfo; + if (wasArrIndex) + { + bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo); + assert(b); + GetArrayInfoMap()->Remove(tree); + } + tree = op1; + GenTree* addr = commaNode->AsOp()->gtOp2; + op1 = gtNewIndir(typ, addr); + // This is very conservative + op1->gtFlags |= treeFlags & ~GTF_ALL_EFFECT & ~GTF_IND_NONFAULTING; + op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT); + + if (wasArrIndex) + { + GetArrayInfoMap()->Set(op1, arrInfo); + } +#ifdef DEBUG + op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + commaNode->AsOp()->gtOp2 = op1; + commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); + return tree; + } + + break; + } + + case GT_ADDR: + + // Can not remove op1 if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(op1)) + { + break; + } + + if (op1->OperGet() == GT_IND) + { + if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0) + { + // Can not remove a GT_ADDR if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(tree)) + { + break; + } + + // Perform the transform ADDR(IND(...)) == (...). + GenTree* addr = op1->AsOp()->gtOp1; + + // If tree has a zero field sequence annotation, update the annotation + // on addr node. + FieldSeqNode* zeroFieldSeq = nullptr; + if (GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq)) + { + fgAddFieldSeqForZeroOffset(addr, zeroFieldSeq); + } + + noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); + + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(tree); + + return addr; + } + } + else if (op1->OperGet() == GT_OBJ) + { + // Can not remove a GT_ADDR if it is currently a CSE candidate. + if (gtIsActiveCSE_Candidate(tree)) + { + break; + } + + // Perform the transform ADDR(OBJ(...)) == (...). + GenTree* addr = op1->AsObj()->Addr(); + + noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL); + + DEBUG_DESTROY_NODE(op1); + DEBUG_DESTROY_NODE(tree); + + return addr; + } + else if (op1->gtOper == GT_CAST) + { + GenTree* casting = op1->AsCast()->CastOp(); + if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR) + { + DEBUG_DESTROY_NODE(op1); + tree->AsOp()->gtOp1 = op1 = casting; + } + } + else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase) + { + // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)). + // (Be sure to mark "z" as an l-value...) + + GenTreePtrStack commas(getAllocator(CMK_ArrayStack)); + for (GenTree* comma = op1; comma != nullptr && comma->gtOper == GT_COMMA; comma = comma->gtGetOp2()) + { + commas.Push(comma); + } + GenTree* commaNode = commas.Top(); + + // The top-level addr might be annotated with a zeroOffset field. + FieldSeqNode* zeroFieldSeq = nullptr; + bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq); + tree = op1; + commaNode->AsOp()->gtOp2->gtFlags |= GTF_DONT_CSE; + + // If the node we're about to put under a GT_ADDR is an indirection, it + // doesn't need to be materialized, since we only want the addressing mode. Because + // of this, this GT_IND is not a faulting indirection and we don't have to extract it + // as a side effect. + GenTree* commaOp2 = commaNode->AsOp()->gtOp2; + if (commaOp2->OperIsBlk()) + { + commaOp2->SetOper(GT_IND); + } + if (commaOp2->gtOper == GT_IND) + { + commaOp2->gtFlags |= GTF_IND_NONFAULTING; + commaOp2->gtFlags &= ~GTF_EXCEPT; + commaOp2->gtFlags |= (commaOp2->AsOp()->gtOp1->gtFlags & GTF_EXCEPT); + } + + op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2); + + if (isZeroOffset) + { + // Transfer the annotation to the new GT_ADDR node. + fgAddFieldSeqForZeroOffset(op1, zeroFieldSeq); + } + commaNode->AsOp()->gtOp2 = op1; + // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform + // might give op1 a type different from byref (like, say, native int). So now go back and give + // all the comma nodes the type of op1. + // TODO: the comma flag update below is conservative and can be improved. + // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to + // get rid of some of the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF). + + while (!commas.Empty()) + { + GenTree* comma = commas.Pop(); + comma->gtType = op1->gtType; + comma->gtFlags |= op1->gtFlags; +#ifdef DEBUG + comma->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + gtUpdateNodeSideEffects(comma); + } + + return tree; + } + break; + + case GT_COLON: + if (fgGlobalMorph) + { + /* Mark the nodes that are conditionally executed */ + fgWalkTreePre(&tree, gtMarkColonCond); + } + /* Since we're doing this postorder we clear this if it got set by a child */ + fgRemoveRestOfBlock = false; + break; + + case GT_COMMA: + + /* Special case: trees that don't produce a value */ + if (op2->OperIs(GT_ASG) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2)) + { + typ = tree->gtType = TYP_VOID; + } + + // If we are in the Valuenum CSE phase then don't morph away anything as these + // nodes may have CSE defs/uses in them. + // + if (!optValnumCSE_phase) + { + // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this + // is all we need. + + GenTree* op1SideEffects = nullptr; + // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example) + // hoisted expressions in loops. + gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE)); + if (op1SideEffects) + { + // Replace the left hand side with the side effect list. + tree->AsOp()->gtOp1 = op1SideEffects; + gtUpdateNodeSideEffects(tree); + } + else + { + op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op1); + return op2; + } + + /* If the right operand is just a void nop node, throw it away */ + if (op2->IsNothingNode() && op1->gtType == TYP_VOID) + { + op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG)); + DEBUG_DESTROY_NODE(tree); + DEBUG_DESTROY_NODE(op2); + return op1; + } + } + + break; + + case GT_JTRUE: + + /* Special case if fgRemoveRestOfBlock is set to true */ + if (fgRemoveRestOfBlock) + { + if (fgIsCommaThrow(op1, true)) + { + GenTree* throwNode = op1->AsOp()->gtOp1; + + JITDUMP("Removing [%06d] GT_JTRUE as the block now unconditionally throws an exception.\n", + dspTreeID(tree)); + DEBUG_DESTROY_NODE(tree); + + return throwNode; + } + + noway_assert(op1->OperKind() & GTK_RELOP); + noway_assert(op1->gtFlags & GTF_EXCEPT); + + // We need to keep op1 for the side-effects. Hang it off + // a GT_COMMA node + + JITDUMP("Keeping side-effects by bashing [%06d] GT_JTRUE into a GT_COMMA.\n", dspTreeID(tree)); + + tree->ChangeOper(GT_COMMA); + tree->AsOp()->gtOp2 = op2 = gtNewNothingNode(); + + // Additionally since we're eliminating the JTRUE + // codegen won't like it if op1 is a RELOP of longs, floats or doubles. + // So we change it into a GT_COMMA as well. + JITDUMP("Also bashing [%06d] (a relop) into a GT_COMMA.\n", dspTreeID(op1)); + op1->ChangeOper(GT_COMMA); + op1->gtFlags &= ~GTF_UNSIGNED; // Clear the unsigned flag if it was set on the relop + op1->gtType = op1->AsOp()->gtOp1->gtType; + + return tree; + } + break; + + default: + break; + } + + assert(oper == tree->gtOper); + + // If we are in the Valuenum CSE phase then don't morph away anything as these + // nodes may have CSE defs/uses in them. + // + if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList()) + { + /* Check for op1 as a GT_COMMA with a unconditional throw node */ + if (op1 && fgIsCommaThrow(op1, true)) + { + if ((op1->gtFlags & GTF_COLON_COND) == 0) + { + /* We can safely throw out the rest of the statements */ + fgRemoveRestOfBlock = true; + } + + GenTree* throwNode = op1->AsOp()->gtOp1; + + if (oper == GT_COMMA) + { + /* Both tree and op1 are GT_COMMA nodes */ + /* Change the tree's op1 to the throw node: op1->AsOp()->gtOp1 */ + tree->AsOp()->gtOp1 = throwNode; + + // Possibly reset the assignment flag + if (((throwNode->gtFlags & GTF_ASG) == 0) && ((op2 == nullptr) || ((op2->gtFlags & GTF_ASG) == 0))) + { + tree->gtFlags &= ~GTF_ASG; + } + + return tree; + } + else if (oper != GT_NOP) + { + if (genActualType(typ) == genActualType(op1->gtType)) + { + /* The types match so, return the comma throw node as the new tree */ + return op1; + } + else + { + if (typ == TYP_VOID) + { + // Return the throw node + return throwNode; + } + else + { + GenTree* commaOp2 = op1->AsOp()->gtOp2; + + // need type of oper to be same as tree + if (typ == TYP_LONG) + { + commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); + commaOp2->AsIntConCommon()->SetLngValue(0); + /* Change the types of oper and commaOp2 to TYP_LONG */ + op1->gtType = commaOp2->gtType = TYP_LONG; + } + else if (varTypeIsFloating(typ)) + { + commaOp2->ChangeOperConst(GT_CNS_DBL); + commaOp2->AsDblCon()->gtDconVal = 0.0; + /* Change the types of oper and commaOp2 to TYP_DOUBLE */ + op1->gtType = commaOp2->gtType = TYP_DOUBLE; + } + else + { + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->AsIntConCommon()->SetIconValue(0); + /* Change the types of oper and commaOp2 to TYP_INT */ + op1->gtType = commaOp2->gtType = TYP_INT; + } + + /* Return the GT_COMMA node as the new tree */ + return op1; + } + } + } + } + + /* Check for op2 as a GT_COMMA with a unconditional throw */ + + if (op2 && fgIsCommaThrow(op2, true)) + { + if ((op2->gtFlags & GTF_COLON_COND) == 0) + { + /* We can safely throw out the rest of the statements */ + fgRemoveRestOfBlock = true; + } + + // If op1 has no side-effects + if ((op1->gtFlags & GTF_ALL_EFFECT) == 0) + { + // If tree is an asg node + if (tree->OperIs(GT_ASG)) + { + /* Return the throw node as the new tree */ + return op2->AsOp()->gtOp1; + } + + if (tree->OperGet() == GT_ARR_BOUNDS_CHECK) + { + /* Return the throw node as the new tree */ + return op2->AsOp()->gtOp1; + } + + // If tree is a comma node + if (tree->OperGet() == GT_COMMA) + { + /* Return the throw node as the new tree */ + return op2->AsOp()->gtOp1; + } + + /* for the shift nodes the type of op2 can differ from the tree type */ + if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT)) + { + noway_assert(GenTree::OperIsShiftOrRotate(oper)); + + GenTree* commaOp2 = op2->AsOp()->gtOp2; + + commaOp2->ChangeOperConst(GT_CNS_NATIVELONG); + commaOp2->AsIntConCommon()->SetLngValue(0); + + /* Change the types of oper and commaOp2 to TYP_LONG */ + op2->gtType = commaOp2->gtType = TYP_LONG; + } + + if ((genActualType(typ) == TYP_INT) && + (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet()))) + { + // An example case is comparison (say GT_GT) of two longs or floating point values. + + GenTree* commaOp2 = op2->AsOp()->gtOp2; + + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->AsIntCon()->gtIconVal = 0; + /* Change the types of oper and commaOp2 to TYP_INT */ + op2->gtType = commaOp2->gtType = TYP_INT; + } + + if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL)) + { + noway_assert(tree->OperGet() == GT_ADD); + + GenTree* commaOp2 = op2->AsOp()->gtOp2; + + commaOp2->ChangeOperConst(GT_CNS_INT); + commaOp2->AsIntCon()->gtIconVal = 0; + /* Change the types of oper and commaOp2 to TYP_BYREF */ + op2->gtType = commaOp2->gtType = TYP_BYREF; + } + + /* types should now match */ + noway_assert((genActualType(typ) == genActualType(op2->gtType))); + + /* Return the GT_COMMA node as the new tree */ + return op2; + } + } + } + + /*------------------------------------------------------------------------- + * Optional morphing is done if tree transformations is permitted + */ + + if ((opts.compFlags & CLFLG_TREETRANS) == 0) + { + return tree; + } + + tree = fgMorphSmpOpOptional(tree->AsOp()); + + return tree; +} + +//---------------------------------------------------------------------------------------------- +// fgMorphRetInd: Try to get rid of extra IND(ADDR()) pairs in a return tree. +// +// Arguments: +// node - The return node that uses an indirection. +// +// Return Value: +// the original op1 of the ret if there was no optimization or an optimized new op1. +// +GenTree* Compiler::fgMorphRetInd(GenTreeUnOp* ret) +{ + assert(ret->OperIs(GT_RETURN)); + assert(ret->gtGetOp1()->OperIs(GT_IND, GT_BLK, GT_OBJ)); + GenTreeIndir* ind = ret->gtGetOp1()->AsIndir(); + GenTree* addr = ind->Addr(); + + if (addr->OperIs(GT_ADDR) && addr->gtGetOp1()->OperIs(GT_LCL_VAR)) + { + // If `return` retypes LCL_VAR as a smaller struct it should not set `doNotEnregister` on that + // LclVar. + // Example: in `Vector128:AsVector2` we have RETURN SIMD8(OBJ SIMD8(ADDR byref(LCL_VAR SIMD16))). + GenTreeLclVar* lclVar = addr->gtGetOp1()->AsLclVar(); + if (!lvaIsImplicitByRefLocal(lclVar->GetLclNum())) + { + assert(!gtIsActiveCSE_Candidate(addr) && !gtIsActiveCSE_Candidate(ind)); + unsigned indSize; + if (ind->OperIs(GT_IND)) + { + indSize = genTypeSize(ind); + } + else + { + indSize = ind->AsBlk()->GetLayout()->GetSize(); + } + + LclVarDsc* varDsc = lvaGetDesc(lclVar); + + unsigned lclVarSize; + if (!lclVar->TypeIs(TYP_STRUCT)) + + { + lclVarSize = genTypeSize(varDsc->TypeGet()); + } + else + { + lclVarSize = varDsc->lvExactSize; + } + // TODO: change conditions in `canFold` to `indSize <= lclVarSize`, but currently do not support `BITCAST + // int<-SIMD16` etc. + assert((indSize <= lclVarSize) || varDsc->lvDoNotEnregister); + +#if defined(TARGET_64BIT) + bool canFold = (indSize == lclVarSize); +#else // !TARGET_64BIT + // TODO: improve 32 bit targets handling for LONG returns if necessary, nowadays we do not support `BITCAST + // long<->double` there. + bool canFold = (indSize == lclVarSize) && (lclVarSize <= REGSIZE_BYTES); +#endif + // TODO: support `genReturnBB != nullptr`, it requires #11413 to avoid `Incompatible types for + // gtNewTempAssign`. + if (canFold && (genReturnBB == nullptr)) + { + // Fold (TYPE1)*(&(TYPE2)x) even if types do not match, lowering will handle it. + // Getting rid of this IND(ADDR()) pair allows to keep lclVar as not address taken + // and enregister it. + DEBUG_DESTROY_NODE(ind); + DEBUG_DESTROY_NODE(addr); + ret->gtOp1 = lclVar; + return ret->gtGetOp1(); + } + else if (!varDsc->lvDoNotEnregister) + { + lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(Compiler::DNER_BlockOp)); + } + } + } + return ind; +} + +#ifdef _PREFAST_ +#pragma warning(pop) +#endif + +GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) +{ + genTreeOps oper = tree->gtOper; + GenTree* op1 = tree->gtOp1; + GenTree* op2 = tree->gtOp2; + var_types typ = tree->TypeGet(); + + if (fgGlobalMorph && GenTree::OperIsCommutative(oper)) + { + /* Swap the operands so that the more expensive one is 'op1' */ + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + tree->gtOp1 = op2; + tree->gtOp2 = op1; + + op2 = op1; + op1 = tree->gtOp1; + + tree->gtFlags &= ~GTF_REVERSE_OPS; + } + + if (oper == op2->gtOper) + { + /* Reorder nested operators at the same precedence level to be + left-recursive. For example, change "(a+(b+c))" to the + equivalent expression "((a+b)+c)". + */ + + /* Things are handled differently for floating-point operators */ + + if (!varTypeIsFloating(tree->TypeGet())) + { + fgMoveOpsLeft(tree); + op1 = tree->gtOp1; + op2 = tree->gtOp2; + } + } + } + +#if REARRANGE_ADDS + + /* Change "((x+icon)+y)" to "((x+y)+icon)" + Don't reorder floating-point operations */ + + if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() && + varTypeIsIntegralOrI(typ)) + { + GenTree* ad1 = op1->AsOp()->gtOp1; + GenTree* ad2 = op1->AsOp()->gtOp2; + + if (!op2->OperIsConst() && ad2->OperIsConst()) + { + // This takes + // + (tree) + // / \. + // / \. + // / \. + // + (op1) op2 + // / \. + // / \. + // ad1 ad2 + // + // and it swaps ad2 and op2. + + // Don't create a byref pointer that may point outside of the ref object. + // If a GC happens, the byref won't get updated. This can happen if one + // of the int components is negative. It also requires the address generation + // be in a fully-interruptible code region. + if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet())) + { + tree->gtOp2 = ad2; + + op1->AsOp()->gtOp2 = op2; + op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT; + + op2 = tree->gtOp2; + } + } + } + +#endif + + /*------------------------------------------------------------------------- + * Perform optional oper-specific postorder morphing + */ + + switch (oper) + { + case GT_ASG: + // Make sure we're allowed to do this. + if (optValnumCSE_phase) + { + // It is not safe to reorder/delete CSE's + break; + } + + if (varTypeIsStruct(typ) && !tree->IsPhiDefn()) + { + if (tree->OperIsCopyBlkOp()) + { + return fgMorphCopyBlock(tree); + } + else + { + return fgMorphInitBlock(tree); + } + } + + if (typ == TYP_LONG) + { + break; + } + + if (op2->gtFlags & GTF_ASG) + { + break; + } + + if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT)) + { + break; + } + + /* Special case: a cast that can be thrown away */ + + // TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only + // one cast and sometimes there is another one after it that gets removed by this + // code. fgMorphSmp should be improved to remove all redundant casts so this code + // can be removed. + + if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow()) + { + var_types srct; + var_types cast; + var_types dstt; + + srct = op2->AsCast()->CastOp()->TypeGet(); + cast = (var_types)op2->CastToType(); + dstt = op1->TypeGet(); + + /* Make sure these are all ints and precision is not lost */ + + if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT) + { + op2 = tree->gtOp2 = op2->AsCast()->CastOp(); + } + } + + break; + + case GT_MUL: + + /* Check for the case "(val + icon) * icon" */ + + if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD) + { + GenTree* add = op1->AsOp()->gtOp2; + + if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0)) + { + if (tree->gtOverflow() || op1->gtOverflow()) + { + break; + } + + ssize_t imul = op2->AsIntCon()->gtIconVal; + ssize_t iadd = add->AsIntCon()->gtIconVal; + + /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */ + + oper = GT_ADD; + tree->ChangeOper(oper); + + op2->AsIntCon()->gtIconVal = iadd * imul; + + op1->ChangeOper(GT_MUL); + + add->AsIntCon()->gtIconVal = imul; +#ifdef TARGET_64BIT + if (add->gtType == TYP_INT) + { + // we need to properly re-sign-extend or truncate after multiplying two int constants above + add->AsIntCon()->TruncateOrSignExtend32(); + } +#endif // TARGET_64BIT + } + } + + break; + + case GT_DIV: + + /* For "val / 1", just return "val" */ + + if (op2->IsIntegralConst(1)) + { + DEBUG_DESTROY_NODE(tree); + return op1; + } + break; + + case GT_UDIV: + case GT_UMOD: + tree->CheckDivideByConstOptimized(this); + break; + + case GT_LSH: + + /* Check for the case "(val + icon) << icon" */ + + if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow()) + { + GenTree* cns = op1->AsOp()->gtOp2; + + if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0)) + { + ssize_t ishf = op2->AsIntConCommon()->IconValue(); + ssize_t iadd = cns->AsIntConCommon()->IconValue(); + + // printf("Changing '(val+icon1)<ChangeOper(GT_ADD); + ssize_t result = iadd << ishf; + op2->AsIntConCommon()->SetIconValue(result); +#ifdef TARGET_64BIT + if (op1->gtType == TYP_INT) + { + op2->AsIntCon()->TruncateOrSignExtend32(); + } +#endif // TARGET_64BIT + + // we are reusing the shift amount node here, but the type we want is that of the shift result + op2->gtType = op1->gtType; + + if (cns->gtOper == GT_CNS_INT && cns->AsIntCon()->gtFieldSeq != nullptr && + cns->AsIntCon()->gtFieldSeq->IsConstantIndexFieldSeq()) + { + assert(cns->AsIntCon()->gtFieldSeq->m_next == nullptr); + op2->AsIntCon()->gtFieldSeq = cns->AsIntCon()->gtFieldSeq; + } + + op1->ChangeOper(GT_LSH); + + cns->AsIntConCommon()->SetIconValue(ishf); + } + } + + break; + + case GT_XOR: + + if (!optValnumCSE_phase) + { + /* "x ^ -1" is "~x" */ + + if (op2->IsIntegralConst(-1)) + { + tree->ChangeOper(GT_NOT); + tree->gtOp2 = nullptr; + DEBUG_DESTROY_NODE(op2); + } + else if (op2->IsIntegralConst(1) && op1->OperIsCompare()) + { + /* "binaryVal ^ 1" is "!binaryVal" */ + gtReverseCond(op1); + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + return op1; + } + } + + break; + + case GT_INIT_VAL: + // Initialization values for initBlk have special semantics - their lower + // byte is used to fill the struct. However, we allow 0 as a "bare" value, + // which enables them to get a VNForZero, and be propagated. + if (op1->IsIntegralConst(0)) + { + return op1; + } + break; + + default: + break; + } + return tree; +} + +//------------------------------------------------------------------------ +// fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b +// (see ECMA III 3.55 and III.3.56). +// +// Arguments: +// tree - The GT_MOD/GT_UMOD tree to morph +// +// Returns: +// The morphed tree +// +// Notes: +// For ARM64 we don't have a remainder instruction so this transform is +// always done. For XARCH this transform is done if we know that magic +// division will be used, in that case this transform allows CSE to +// eliminate the redundant div from code like "x = a / 3; y = a % 3;". +// +// This method will produce the above expression in 'a' and 'b' are +// leaf nodes, otherwise, if any of them is not a leaf it will spill +// its value into a temporary variable, an example: +// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) ) +// +GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) +{ + if (tree->OperGet() == GT_MOD) + { + tree->SetOper(GT_DIV); + } + else if (tree->OperGet() == GT_UMOD) + { + tree->SetOper(GT_UDIV); + } + else + { + noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv"); + } + + var_types type = tree->gtType; + GenTree* denominator = tree->gtOp2; + GenTree* numerator = tree->gtOp1; + + if (!numerator->OperIsLeaf()) + { + numerator = fgMakeMultiUse(&tree->gtOp1); + } + + if (!denominator->OperIsLeaf()) + { + denominator = fgMakeMultiUse(&tree->gtOp2); + } + + // The numerator and denominator may have been assigned to temps, in which case + // their defining assignments are in the current tree. Therefore, we need to + // set the execuction order accordingly on the nodes we create. + // That is, the "mul" will be evaluated in "normal" order, and the "sub" must + // be set to be evaluated in reverse order. + // + GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator)); + assert(!mul->IsReverseOp()); + GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul); + sub->gtFlags |= GTF_REVERSE_OPS; + +#ifdef DEBUG + sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif + + tree->CheckDivideByConstOptimized(this); + + return sub; +} + +//------------------------------------------------------------------------------ +// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree. +// +// +// Arguments: +// oper - Operation to check +// +// Return Value: +// True if the operation can be a root of a bitwise rotation tree; false otherwise. + +bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper) +{ + return (oper == GT_OR) || (oper == GT_XOR); +} + +//------------------------------------------------------------------------------ +// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return +// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree. +// +// Arguments: +// tree - tree to check for a rotation pattern +// +// Return Value: +// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise. +// +// Assumption: +// The input is a GT_OR or a GT_XOR tree. + +GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree) +{ + // + // Check for a rotation pattern, e.g., + // + // OR ROL + // / \ / \. + // LSH RSZ -> x y + // / \ / \. + // x AND x AND + // / \ / \. + // y 31 ADD 31 + // / \. + // NEG 32 + // | + // y + // The patterns recognized: + // (x << (y & M)) op (x >>> ((-y + N) & M)) + // (x >>> ((-y + N) & M)) op (x << (y & M)) + // + // (x << y) op (x >>> (-y + N)) + // (x >> > (-y + N)) op (x << y) + // + // (x >>> (y & M)) op (x << ((-y + N) & M)) + // (x << ((-y + N) & M)) op (x >>> (y & M)) + // + // (x >>> y) op (x << (-y + N)) + // (x << (-y + N)) op (x >>> y) + // + // (x << c1) op (x >>> c2) + // (x >>> c1) op (x << c2) + // + // where + // c1 and c2 are const + // c1 + c2 == bitsize(x) + // N == bitsize(x) + // M is const + // M & (N - 1) == N - 1 + // op is either | or ^ + + if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0)) + { + // We can't do anything if the tree has assignments, calls, or volatile + // reads. Note that we allow GTF_EXCEPT side effect since any exceptions + // thrown by the original tree will be thrown by the transformed tree as well. + return tree; + } + + genTreeOps oper = tree->OperGet(); + assert(fgOperIsBitwiseRotationRoot(oper)); + + // Check if we have an LSH on one side of the OR and an RSZ on the other side. + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + GenTree* leftShiftTree = nullptr; + GenTree* rightShiftTree = nullptr; + if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ)) + { + leftShiftTree = op1; + rightShiftTree = op2; + } + else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH)) + { + leftShiftTree = op2; + rightShiftTree = op1; + } + else + { + return tree; + } + + // Check if the trees representing the value to shift are identical. + // We already checked that there are no side effects above. + if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1())) + { + GenTree* rotatedValue = leftShiftTree->gtGetOp1(); + var_types rotatedValueActualType = genActualType(rotatedValue->gtType); + ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8; + noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64)); + GenTree* leftShiftIndex = leftShiftTree->gtGetOp2(); + GenTree* rightShiftIndex = rightShiftTree->gtGetOp2(); + + // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits + // shouldn't be masked for the transformation to be valid. If additional + // higher bits are not masked, the transformation is still valid since the result + // of MSIL shift instructions is unspecified if the shift amount is greater or equal + // than the width of the value being shifted. + ssize_t minimalMask = rotatedValueBitSize - 1; + ssize_t leftShiftMask = -1; + ssize_t rightShiftMask = -1; + + if ((leftShiftIndex->OperGet() == GT_AND)) + { + if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI()) + { + leftShiftMask = leftShiftIndex->gtGetOp2()->AsIntCon()->gtIconVal; + leftShiftIndex = leftShiftIndex->gtGetOp1(); + } + else + { + return tree; + } + } + + if ((rightShiftIndex->OperGet() == GT_AND)) + { + if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI()) + { + rightShiftMask = rightShiftIndex->gtGetOp2()->AsIntCon()->gtIconVal; + rightShiftIndex = rightShiftIndex->gtGetOp1(); + } + else + { + return tree; + } + } + + if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask)) + { + // The shift index is overmasked, e.g., we have + // something like (x << y & 15) or + // (x >> (32 - y) & 15 with 32 bit x. + // The transformation is not valid. + return tree; + } + + GenTree* shiftIndexWithAdd = nullptr; + GenTree* shiftIndexWithoutAdd = nullptr; + genTreeOps rotateOp = GT_NONE; + GenTree* rotateIndex = nullptr; + + if (leftShiftIndex->OperGet() == GT_ADD) + { + shiftIndexWithAdd = leftShiftIndex; + shiftIndexWithoutAdd = rightShiftIndex; + rotateOp = GT_ROR; + } + else if (rightShiftIndex->OperGet() == GT_ADD) + { + shiftIndexWithAdd = rightShiftIndex; + shiftIndexWithoutAdd = leftShiftIndex; + rotateOp = GT_ROL; + } + + if (shiftIndexWithAdd != nullptr) + { + if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI()) + { + if (shiftIndexWithAdd->gtGetOp2()->AsIntCon()->gtIconVal == rotatedValueBitSize) + { + if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG) + { + if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd)) + { + // We found one of these patterns: + // (x << (y & M)) | (x >>> ((-y + N) & M)) + // (x << y) | (x >>> (-y + N)) + // (x >>> (y & M)) | (x << ((-y + N) & M)) + // (x >>> y) | (x << (-y + N)) + // where N == bitsize(x), M is const, and + // M & (N - 1) == N - 1 + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifndef TARGET_64BIT + if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64)) + { + // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86. + // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need + // to add helpers for GT_ROL and GT_ROR. + return tree; + } +#endif + + rotateIndex = shiftIndexWithoutAdd; + } + } + } + } + } + else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI())) + { + if (leftShiftIndex->AsIntCon()->gtIconVal + rightShiftIndex->AsIntCon()->gtIconVal == rotatedValueBitSize) + { + // We found this pattern: + // (x << c1) | (x >>> c2) + // where c1 and c2 are const and c1 + c2 == bitsize(x) + rotateOp = GT_ROL; + rotateIndex = leftShiftIndex; + } + } + + if (rotateIndex != nullptr) + { + noway_assert(GenTree::OperIsRotate(rotateOp)); + + unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT; + + // We can use the same tree only during global morph; reusing the tree in a later morph + // may invalidate value numbers. + if (fgGlobalMorph) + { + tree->AsOp()->gtOp1 = rotatedValue; + tree->AsOp()->gtOp2 = rotateIndex; + tree->ChangeOper(rotateOp); + + unsigned childFlags = 0; + for (GenTree* op : tree->Operands()) + { + childFlags |= (op->gtFlags & GTF_ALL_EFFECT); + } + + // The parent's flags should be a superset of its operands' flags + noway_assert((inputTreeEffects & childFlags) == childFlags); + } + else + { + tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex); + noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT)); + } + + return tree; + } + } + return tree; +} + +/***************************************************************************** + * + * Transform the given tree for code generation and return an equivalent tree. + */ + +GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac) +{ + assert(tree); + +#ifdef DEBUG + if (verbose) + { + if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID) + { + noway_assert(!"JitBreakMorphTree hit"); + } + } +#endif + +#ifdef DEBUG + int thisMorphNum = 0; + if (verbose && treesBeforeAfterMorph) + { + thisMorphNum = morphNum++; + printf("\nfgMorphTree (before %d):\n", thisMorphNum); + gtDispTree(tree); + } +#endif + + if (fgGlobalMorph) + { + // Apply any rewrites for implicit byref arguments before morphing the + // tree. + + if (fgMorphImplicitByRefArgs(tree)) + { +#ifdef DEBUG + if (verbose && treesBeforeAfterMorph) + { + printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum); + gtDispTree(tree); + } +#endif + } + } + +/*------------------------------------------------------------------------- + * fgMorphTree() can potentially replace a tree with another, and the + * caller has to store the return value correctly. + * Turn this on to always make copy of "tree" here to shake out + * hidden/unupdated references. + */ + +#ifdef DEBUG + + if (compStressCompile(STRESS_GENERIC_CHECK, 0)) + { + GenTree* copy; + + if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL) + { + copy = gtNewLargeOperNode(GT_ADD, TYP_INT); + } + else + { + copy = new (this, GT_CALL) GenTreeCall(TYP_INT); + } + + copy->ReplaceWith(tree, this); + +#if defined(LATE_DISASM) + // GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields + if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle()) + { + copy->AsIntCon()->gtCompileTimeHandle = tree->AsIntCon()->gtCompileTimeHandle; + } +#endif + + DEBUG_DESTROY_NODE(tree); + tree = copy; + } +#endif // DEBUG + + if (fgGlobalMorph) + { + /* Ensure that we haven't morphed this node already */ + assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!"); + +#if LOCAL_ASSERTION_PROP + /* Before morphing the tree, we try to propagate any active assertions */ + if (optLocalAssertionProp) + { + /* Do we have any active assertions? */ + + if (optAssertionCount > 0) + { + GenTree* newTree = tree; + while (newTree != nullptr) + { + tree = newTree; + /* newTree is non-Null if we propagated an assertion */ + newTree = optAssertionProp(apFull, tree, nullptr, nullptr); + } + assert(tree != nullptr); + } + } + PREFAST_ASSUME(tree != nullptr); +#endif + } + + /* Save the original un-morphed tree for fgMorphTreeDone */ + + GenTree* oldTree = tree; + + /* Figure out what kind of a node we have */ + + unsigned kind = tree->OperKind(); + + /* Is this a constant node? */ + + if (kind & GTK_CONST) + { + tree = fgMorphConst(tree); + goto DONE; + } + + /* Is this a leaf node? */ + + if (kind & GTK_LEAF) + { + tree = fgMorphLeaf(tree); + goto DONE; + } + + /* Is it a 'simple' unary/binary operator? */ + + if (kind & GTK_SMPOP) + { + tree = fgMorphSmpOp(tree, mac); + goto DONE; + } + + /* See what kind of a special operator we have here */ + + switch (tree->OperGet()) + { + case GT_FIELD: + tree = fgMorphField(tree, mac); + break; + + case GT_CALL: + if (tree->OperMayThrow(this)) + { + tree->gtFlags |= GTF_EXCEPT; + } + else + { + tree->gtFlags &= ~GTF_EXCEPT; + } + tree = fgMorphCall(tree->AsCall()); + break; + + case GT_ARR_BOUNDS_CHECK: +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif // FEATURE_HW_INTRINSICS + { + fgSetRngChkTarget(tree); + + GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); + bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex); + bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen); + // If the index is a comma(throw, x), just return that. + if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex)) + { + tree = bndsChk->gtIndex; + } + + bndsChk->gtFlags &= ~GTF_CALL; + + // Propagate effects flags upwards + bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT); + bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT); + + // Otherwise, we don't change the tree. + } + break; + + case GT_ARR_ELEM: + tree->AsArrElem()->gtArrObj = fgMorphTree(tree->AsArrElem()->gtArrObj); + + unsigned dim; + for (dim = 0; dim < tree->AsArrElem()->gtArrRank; dim++) + { + tree->AsArrElem()->gtArrInds[dim] = fgMorphTree(tree->AsArrElem()->gtArrInds[dim]); + } + + tree->gtFlags &= ~GTF_CALL; + + tree->gtFlags |= tree->AsArrElem()->gtArrObj->gtFlags & GTF_ALL_EFFECT; + + for (dim = 0; dim < tree->AsArrElem()->gtArrRank; dim++) + { + tree->gtFlags |= tree->AsArrElem()->gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT; + } + + if (fgGlobalMorph) + { + fgSetRngChkTarget(tree, false); + } + break; + + case GT_ARR_OFFSET: + tree->AsArrOffs()->gtOffset = fgMorphTree(tree->AsArrOffs()->gtOffset); + tree->AsArrOffs()->gtIndex = fgMorphTree(tree->AsArrOffs()->gtIndex); + tree->AsArrOffs()->gtArrObj = fgMorphTree(tree->AsArrOffs()->gtArrObj); + + tree->gtFlags &= ~GTF_CALL; + tree->gtFlags |= tree->AsArrOffs()->gtOffset->gtFlags & GTF_ALL_EFFECT; + tree->gtFlags |= tree->AsArrOffs()->gtIndex->gtFlags & GTF_ALL_EFFECT; + tree->gtFlags |= tree->AsArrOffs()->gtArrObj->gtFlags & GTF_ALL_EFFECT; + if (fgGlobalMorph) + { + fgSetRngChkTarget(tree, false); + } + break; + + case GT_PHI: + tree->gtFlags &= ~GTF_ALL_EFFECT; + for (GenTreePhi::Use& use : tree->AsPhi()->Uses()) + { + use.SetNode(fgMorphTree(use.GetNode())); + tree->gtFlags |= use.GetNode()->gtFlags & GTF_ALL_EFFECT; + } + break; + + case GT_FIELD_LIST: + tree->gtFlags &= ~GTF_ALL_EFFECT; + for (GenTreeFieldList::Use& use : tree->AsFieldList()->Uses()) + { + use.SetNode(fgMorphTree(use.GetNode())); + tree->gtFlags |= (use.GetNode()->gtFlags & GTF_ALL_EFFECT); + } + break; + + case GT_CMPXCHG: + tree->AsCmpXchg()->gtOpLocation = fgMorphTree(tree->AsCmpXchg()->gtOpLocation); + tree->AsCmpXchg()->gtOpValue = fgMorphTree(tree->AsCmpXchg()->gtOpValue); + tree->AsCmpXchg()->gtOpComparand = fgMorphTree(tree->AsCmpXchg()->gtOpComparand); + + tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL); + + tree->gtFlags |= tree->AsCmpXchg()->gtOpLocation->gtFlags & GTF_ALL_EFFECT; + tree->gtFlags |= tree->AsCmpXchg()->gtOpValue->gtFlags & GTF_ALL_EFFECT; + tree->gtFlags |= tree->AsCmpXchg()->gtOpComparand->gtFlags & GTF_ALL_EFFECT; + break; + + case GT_STORE_DYN_BLK: + case GT_DYN_BLK: + if (tree->OperGet() == GT_STORE_DYN_BLK) + { + tree->AsDynBlk()->Data() = fgMorphTree(tree->AsDynBlk()->Data()); + } + tree->AsDynBlk()->Addr() = fgMorphTree(tree->AsDynBlk()->Addr()); + tree->AsDynBlk()->gtDynamicSize = fgMorphTree(tree->AsDynBlk()->gtDynamicSize); + + tree->gtFlags &= ~GTF_CALL; + tree->SetIndirExceptionFlags(this); + + if (tree->OperGet() == GT_STORE_DYN_BLK) + { + tree->gtFlags |= tree->AsDynBlk()->Data()->gtFlags & GTF_ALL_EFFECT; + } + tree->gtFlags |= tree->AsDynBlk()->Addr()->gtFlags & GTF_ALL_EFFECT; + tree->gtFlags |= tree->AsDynBlk()->gtDynamicSize->gtFlags & GTF_ALL_EFFECT; + break; + + case GT_INDEX_ADDR: + GenTreeIndexAddr* indexAddr; + indexAddr = tree->AsIndexAddr(); + indexAddr->Index() = fgMorphTree(indexAddr->Index()); + indexAddr->Arr() = fgMorphTree(indexAddr->Arr()); + + tree->gtFlags &= ~GTF_CALL; + + tree->gtFlags |= indexAddr->Index()->gtFlags & GTF_ALL_EFFECT; + tree->gtFlags |= indexAddr->Arr()->gtFlags & GTF_ALL_EFFECT; + break; + + default: +#ifdef DEBUG + gtDispTree(tree); +#endif + noway_assert(!"unexpected operator"); + } +DONE: + + fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum)); + + return tree; +} + +#if LOCAL_ASSERTION_PROP +//------------------------------------------------------------------------ +// fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum +// +// Arguments: +// lclNum - The varNum of the lclVar for which we're killing assertions. +// tree - (DEBUG only) the tree responsible for killing its assertions. +// +void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree)) +{ + /* All dependent assertions are killed here */ + + ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum)); + + if (killed) + { + AssertionIndex index = optAssertionCount; + while (killed && (index > 0)) + { + if (BitVecOps::IsMember(apTraits, killed, index - 1)) + { +#ifdef DEBUG + AssertionDsc* curAssertion = optGetAssertion(index); + noway_assert((curAssertion->op1.lcl.lclNum == lclNum) || + ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum))); + if (verbose) + { + printf("\nThe assignment "); + printTreeID(tree); + printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum); + optPrintAssertion(curAssertion); + } +#endif + // Remove this bit from the killed mask + BitVecOps::RemoveElemD(apTraits, killed, index - 1); + + optAssertionRemove(index); + } + + index--; + } + + // killed mask should now be zero + noway_assert(BitVecOps::IsEmpty(apTraits, killed)); + } +} +//------------------------------------------------------------------------ +// fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum. +// +// Arguments: +// lclNum - The varNum of the lclVar for which we're killing assertions. +// tree - (DEBUG only) the tree responsible for killing its assertions. +// +// Notes: +// For structs and struct fields, it will invalidate the children and parent +// respectively. +// Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar. +// +void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree)) +{ + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (varDsc->lvPromoted) + { + noway_assert(varTypeIsStruct(varDsc)); + + // Kill the field locals. + for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) + { + fgKillDependentAssertionsSingle(i DEBUGARG(tree)); + } + + // Kill the struct local itself. + fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); + } + else if (varDsc->lvIsStructField) + { + // Kill the field local. + fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); + + // Kill the parent struct. + fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree)); + } + else + { + fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); + } +} +#endif // LOCAL_ASSERTION_PROP + +/***************************************************************************** + * + * This function is called to complete the morphing of a tree node + * It should only be called once for each node. + * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated, + * to enforce the invariant that each node is only morphed once. + * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced + * by an equivalent tree. + * + */ + +void Compiler::fgMorphTreeDone(GenTree* tree, + GenTree* oldTree /* == NULL */ + DEBUGARG(int morphNum)) +{ +#ifdef DEBUG + if (verbose && treesBeforeAfterMorph) + { + printf("\nfgMorphTree (after %d):\n", morphNum); + gtDispTree(tree); + printf(""); // in our logic this causes a flush + } +#endif + + if (!fgGlobalMorph) + { + return; + } + + if ((oldTree != nullptr) && (oldTree != tree)) + { + /* Ensure that we have morphed this node */ + assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!"); + +#ifdef DEBUG + TransferTestDataToNode(oldTree, tree); +#endif + } + else + { + // Ensure that we haven't morphed this node already + assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!"); + } + + if (tree->OperKind() & GTK_CONST) + { + goto DONE; + } + +#if LOCAL_ASSERTION_PROP + + if (!optLocalAssertionProp) + { + goto DONE; + } + + /* Do we have any active assertions? */ + + if (optAssertionCount > 0) + { + /* Is this an assignment to a local variable */ + GenTreeLclVarCommon* lclVarTree = nullptr; + + // The check below will miss LIR-style assignments. + // + // But we shouldn't be running local assertion prop on these, + // as local prop gets disabled when we run global prop. + assert(!tree->OperIs(GT_STORE_LCL_VAR, GT_STORE_LCL_FLD)); + + // DefinesLocal can return true for some BLK op uses, so + // check what gets assigned only when we're at an assignment. + if (tree->OperIs(GT_ASG) && tree->DefinesLocal(this, &lclVarTree)) + { + unsigned lclNum = lclVarTree->GetLclNum(); + noway_assert(lclNum < lvaCount); + fgKillDependentAssertions(lclNum DEBUGARG(tree)); + } + } + + /* If this tree makes a new assertion - make it available */ + optAssertionGen(tree); + +#endif // LOCAL_ASSERTION_PROP + +DONE:; + +#ifdef DEBUG + /* Mark this node as being morphed */ + tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif +} + +/***************************************************************************** + * + * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants + * Returns true if we modified the flow graph + */ + +bool Compiler::fgFoldConditional(BasicBlock* block) +{ + bool result = false; + + // We don't want to make any code unreachable + if (opts.OptimizationDisabled()) + { + return false; + } + + if (block->bbJumpKind == BBJ_COND) + { + noway_assert(block->bbStmtList != nullptr && block->bbStmtList->GetPrevStmt() != nullptr); + + Statement* lastStmt = block->lastStmt(); + + noway_assert(lastStmt->GetNextStmt() == nullptr); + + if (lastStmt->GetRootNode()->gtOper == GT_CALL) + { + noway_assert(fgRemoveRestOfBlock); + + /* Unconditional throw - transform the basic block into a BBJ_THROW */ + fgConvertBBToThrowBB(block); + +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at " FMT_BB "\n", block->bbNum); + printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum); + } +#endif + goto DONE_COND; + } + + noway_assert(lastStmt->GetRootNode()->gtOper == GT_JTRUE); + + /* Did we fold the conditional */ + + noway_assert(lastStmt->GetRootNode()->AsOp()->gtOp1); + GenTree* condTree; + condTree = lastStmt->GetRootNode()->AsOp()->gtOp1; + GenTree* cond; + cond = condTree->gtEffectiveVal(true); + + if (cond->OperKind() & GTK_CONST) + { + /* Yupee - we folded the conditional! + * Remove the conditional statement */ + + noway_assert(cond->gtOper == GT_CNS_INT); + noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0)); + + if (condTree != cond) + { + // Preserve any side effects + assert(condTree->OperIs(GT_COMMA)); + lastStmt->SetRootNode(condTree); + } + else + { + // no side effects, remove the jump entirely + fgRemoveStmt(block, lastStmt); + } + // block is a BBJ_COND that we are folding the conditional for. + // bTaken is the path that will always be taken from block. + // bNotTaken is the path that will never be taken from block. + // + BasicBlock* bTaken; + BasicBlock* bNotTaken; + + if (cond->AsIntCon()->gtIconVal != 0) + { + /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */ + block->bbJumpKind = BBJ_ALWAYS; + bTaken = block->bbJumpDest; + bNotTaken = block->bbNext; + } + else + { + /* Unmark the loop if we are removing a backwards branch */ + /* dest block must also be marked as a loop head and */ + /* We must be able to reach the backedge block */ + if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && + fgReachable(block->bbJumpDest, block)) + { + optUnmarkLoopBlocks(block->bbJumpDest, block); + } + + /* JTRUE 0 - transform the basic block into a BBJ_NONE */ + block->bbJumpKind = BBJ_NONE; + bTaken = block->bbNext; + bNotTaken = block->bbJumpDest; + } + + if (fgHaveValidEdgeWeights) + { + // We are removing an edge from block to bNotTaken + // and we have already computed the edge weights, so + // we will try to adjust some of the weights + // + flowList* edgeTaken = fgGetPredForBlock(bTaken, block); + BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block + + // We examine the taken edge (block -> bTaken) + // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight + // else if bTaken has valid profile weight and block does not we try to adjust block's weight + // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken + // + if (block->hasProfileWeight()) + { + // The edge weights for (block -> bTaken) are 100% of block's weight + + edgeTaken->setEdgeWeights(block->bbWeight, block->bbWeight, bTaken); + + if (!bTaken->hasProfileWeight()) + { + if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight)) + { + // Update the weight of bTaken + bTaken->inheritWeight(block); + bUpdated = bTaken; + } + } + } + else if (bTaken->hasProfileWeight()) + { + if (bTaken->countOfInEdges() == 1) + { + // There is only one in edge to bTaken + edgeTaken->setEdgeWeights(bTaken->bbWeight, bTaken->bbWeight, bTaken); + + // Update the weight of block + block->inheritWeight(bTaken); + bUpdated = block; + } + } + + if (bUpdated != nullptr) + { + BasicBlock::weight_t newMinWeight; + BasicBlock::weight_t newMaxWeight; + + flowList* edge; + // Now fix the weights of the edges out of 'bUpdated' + switch (bUpdated->bbJumpKind) + { + case BBJ_NONE: + edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); + newMaxWeight = bUpdated->bbWeight; + newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); + break; + + case BBJ_COND: + edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated); + newMaxWeight = bUpdated->bbWeight; + newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); + FALLTHROUGH; + + case BBJ_ALWAYS: + edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated); + newMaxWeight = bUpdated->bbWeight; + newMinWeight = min(edge->edgeWeightMin(), newMaxWeight); + edge->setEdgeWeights(newMinWeight, newMaxWeight, bUpdated->bbNext); + break; + + default: + // We don't handle BBJ_SWITCH + break; + } + } + } + + /* modify the flow graph */ + + /* Remove 'block' from the predecessor list of 'bNotTaken' */ + fgRemoveRefPred(bNotTaken, block); + +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at " FMT_BB "\n", block->bbNum); + printf(FMT_BB " becomes a %s", block->bbNum, + block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE"); + if (block->bbJumpKind == BBJ_ALWAYS) + { + printf(" to " FMT_BB, block->bbJumpDest->bbNum); + } + printf("\n"); + } +#endif + + /* if the block was a loop condition we may have to modify + * the loop table */ + + for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++) + { + /* Some loops may have been already removed by + * loop unrolling or conditional folding */ + + if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED) + { + continue; + } + + /* We are only interested in the loop bottom */ + + if (optLoopTable[loopNum].lpBottom == block) + { + if (cond->AsIntCon()->gtIconVal == 0) + { + /* This was a bogus loop (condition always false) + * Remove the loop from the table */ + + optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED; +#if FEATURE_LOOP_ALIGN + optLoopTable[loopNum].lpFirst->bbFlags &= ~BBF_LOOP_ALIGN; + JITDUMP("Removing LOOP_ALIGN flag from bogus loop in " FMT_BB "\n", + optLoopTable[loopNum].lpFirst->bbNum); +#endif + +#ifdef DEBUG + if (verbose) + { + printf("Removing loop " FMT_LP " (from " FMT_BB " to " FMT_BB ")\n\n", loopNum, + optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum); + } +#endif + } + } + } + DONE_COND: + result = true; + } + } + else if (block->bbJumpKind == BBJ_SWITCH) + { + noway_assert(block->bbStmtList != nullptr && block->bbStmtList->GetPrevStmt() != nullptr); + + Statement* lastStmt = block->lastStmt(); + + noway_assert(lastStmt->GetNextStmt() == nullptr); + + if (lastStmt->GetRootNode()->gtOper == GT_CALL) + { + noway_assert(fgRemoveRestOfBlock); + + /* Unconditional throw - transform the basic block into a BBJ_THROW */ + fgConvertBBToThrowBB(block); + +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at " FMT_BB "\n", block->bbNum); + printf(FMT_BB " becomes a BBJ_THROW\n", block->bbNum); + } +#endif + goto DONE_SWITCH; + } + + noway_assert(lastStmt->GetRootNode()->gtOper == GT_SWITCH); + + /* Did we fold the conditional */ + + noway_assert(lastStmt->GetRootNode()->AsOp()->gtOp1); + GenTree* condTree; + condTree = lastStmt->GetRootNode()->AsOp()->gtOp1; + GenTree* cond; + cond = condTree->gtEffectiveVal(true); + + if (cond->OperKind() & GTK_CONST) + { + /* Yupee - we folded the conditional! + * Remove the conditional statement */ + + noway_assert(cond->gtOper == GT_CNS_INT); + + if (condTree != cond) + { + // Preserve any side effects + assert(condTree->OperIs(GT_COMMA)); + lastStmt->SetRootNode(condTree); + } + else + { + // no side effects, remove the switch entirely + fgRemoveStmt(block, lastStmt); + } + + /* modify the flow graph */ + + /* Find the actual jump target */ + unsigned switchVal; + switchVal = (unsigned)cond->AsIntCon()->gtIconVal; + unsigned jumpCnt; + jumpCnt = block->bbJumpSwt->bbsCount; + BasicBlock** jumpTab; + jumpTab = block->bbJumpSwt->bbsDstTab; + bool foundVal; + foundVal = false; + + for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) + { + BasicBlock* curJump = *jumpTab; + + assert(curJump->countOfInEdges() > 0); + + // If val matches switchVal or we are at the last entry and + // we never found the switch value then set the new jump dest + + if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1))) + { + if (curJump != block->bbNext) + { + /* transform the basic block into a BBJ_ALWAYS */ + block->bbJumpKind = BBJ_ALWAYS; + block->bbJumpDest = curJump; + } + else + { + /* transform the basic block into a BBJ_NONE */ + block->bbJumpKind = BBJ_NONE; + } + foundVal = true; + } + else + { + /* Remove 'block' from the predecessor list of 'curJump' */ + fgRemoveRefPred(curJump, block); + } + } +#ifdef DEBUG + if (verbose) + { + printf("\nConditional folded at " FMT_BB "\n", block->bbNum); + printf(FMT_BB " becomes a %s", block->bbNum, + block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE"); + if (block->bbJumpKind == BBJ_ALWAYS) + { + printf(" to " FMT_BB, block->bbJumpDest->bbNum); + } + printf("\n"); + } +#endif + DONE_SWITCH: + result = true; + } + } + return result; +} + +//------------------------------------------------------------------------ +// fgMorphBlockStmt: morph a single statement in a block. +// +// Arguments: +// block - block containing the statement +// stmt - statement to morph +// msg - string to identify caller in a dump +// +// Returns: +// true if 'stmt' was removed from the block. +// s false if 'stmt' is still in the block (even if other statements were removed). +// +// Notes: +// Can be called anytime, unlike fgMorphStmts() which should only be called once. +// +bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg)) +{ + assert(block != nullptr); + assert(stmt != nullptr); + + // Reset some ambient state + fgRemoveRestOfBlock = false; + compCurBB = block; + compCurStmt = stmt; + + GenTree* morph = fgMorphTree(stmt->GetRootNode()); + + // Bug 1106830 - During the CSE phase we can't just remove + // morph->AsOp()->gtOp2 as it could contain CSE expressions. + // This leads to a noway_assert in OptCSE.cpp when + // searching for the removed CSE ref. (using gtFindLink) + // + if (!optValnumCSE_phase) + { + // Check for morph as a GT_COMMA with an unconditional throw + if (fgIsCommaThrow(morph, true)) + { +#ifdef DEBUG + if (verbose) + { + printf("Folding a top-level fgIsCommaThrow stmt\n"); + printf("Removing op2 as unreachable:\n"); + gtDispTree(morph->AsOp()->gtOp2); + printf("\n"); + } +#endif + // Use the call as the new stmt + morph = morph->AsOp()->gtOp1; + noway_assert(morph->gtOper == GT_CALL); + } + + // we can get a throw as a statement root + if (fgIsThrow(morph)) + { +#ifdef DEBUG + if (verbose) + { + printf("We have a top-level fgIsThrow stmt\n"); + printf("Removing the rest of block as unreachable:\n"); + } +#endif + noway_assert((morph->gtFlags & GTF_COLON_COND) == 0); + fgRemoveRestOfBlock = true; + } + } + + stmt->SetRootNode(morph); + + // Can the entire tree be removed? + bool removedStmt = false; + + // Defer removing statements during CSE so we don't inadvertently remove any CSE defs. + if (!optValnumCSE_phase) + { + removedStmt = fgCheckRemoveStmt(block, stmt); + } + + // Or this is the last statement of a conditional branch that was just folded? + if (!removedStmt && (stmt->GetNextStmt() == nullptr) && !fgRemoveRestOfBlock) + { + if (fgFoldConditional(block)) + { + if (block->bbJumpKind != BBJ_THROW) + { + removedStmt = true; + } + } + } + + if (!removedStmt) + { + // Have to re-do the evaluation order since for example some later code does not expect constants as op1 + gtSetStmtInfo(stmt); + + // Have to re-link the nodes for this statement + fgSetStmtSeq(stmt); + } + +#ifdef DEBUG + if (verbose) + { + printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed")); + gtDispTree(morph); + printf("\n"); + } +#endif + + if (fgRemoveRestOfBlock) + { + // Remove the rest of the stmts in the block + for (Statement* removeStmt : StatementList(stmt->GetNextStmt())) + { + fgRemoveStmt(block, removeStmt); + } + + // The rest of block has been removed and we will always throw an exception. + // + // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE. + // We should not convert it to a ThrowBB. + if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0)) + { + // Convert block to a throw bb + fgConvertBBToThrowBB(block); + } + +#ifdef DEBUG + if (verbose) + { + printf("\n%s Block " FMT_BB " becomes a throw block.\n", msg, block->bbNum); + } +#endif + fgRemoveRestOfBlock = false; + } + + return removedStmt; +} + +/***************************************************************************** + * + * Morph the statements of the given block. + * This function should be called just once for a block. Use fgMorphBlockStmt() + * for reentrant calls. + */ + +void Compiler::fgMorphStmts(BasicBlock* block, bool* lnot, bool* loadw) +{ + fgRemoveRestOfBlock = false; + + *lnot = *loadw = false; + + fgCurrentlyInUseArgTemps = hashBv::Create(this); + + for (Statement* stmt : block->Statements()) + { + if (fgRemoveRestOfBlock) + { + fgRemoveStmt(block, stmt); + continue; + } +#ifdef FEATURE_SIMD + if (opts.OptimizationEnabled() && stmt->GetRootNode()->TypeGet() == TYP_FLOAT && + stmt->GetRootNode()->OperGet() == GT_ASG) + { + fgMorphCombineSIMDFieldAssignments(block, stmt); + } +#endif + + fgMorphStmt = stmt; + compCurStmt = stmt; + GenTree* oldTree = stmt->GetRootNode(); + +#ifdef DEBUG + + unsigned oldHash = verbose ? gtHashValue(oldTree) : DUMMY_INIT(~0); + + if (verbose) + { + printf("\nfgMorphTree " FMT_BB ", " FMT_STMT " (before)\n", block->bbNum, stmt->GetID()); + gtDispTree(oldTree); + } +#endif + + /* Morph this statement tree */ + + GenTree* morphedTree = fgMorphTree(oldTree); + + // mark any outgoing arg temps as free so we can reuse them in the next statement. + + fgCurrentlyInUseArgTemps->ZeroAll(); + + // Has fgMorphStmt been sneakily changed ? + + if ((stmt->GetRootNode() != oldTree) || (block != compCurBB)) + { + if (stmt->GetRootNode() != oldTree) + { + /* This must be tailcall. Ignore 'morphedTree' and carry on with + the tail-call node */ + + morphedTree = stmt->GetRootNode(); + } + else + { + /* This must be a tailcall that caused a GCPoll to get + injected. We haven't actually morphed the call yet + but the flag still got set, clear it here... */ + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + morphedTree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; +#endif + } + + noway_assert(compTailCallUsed); + noway_assert(morphedTree->gtOper == GT_CALL); + GenTreeCall* call = morphedTree->AsCall(); + // Could be + // - a fast call made as jmp in which case block will be ending with + // BBJ_RETURN (as we need epilog) and marked as containing a jmp. + // - a tailcall dispatched via JIT helper, on x86, in which case + // block will be ending with BBJ_THROW. + // - a tail call dispatched via runtime help (IL stubs), in which + // case there will not be any tailcall and the block will be ending + // with BBJ_RETURN (as normal control flow) + noway_assert((call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) && + ((compCurBB->bbFlags & BBF_HAS_JMP)) != 0) || + (call->IsTailCallViaJitHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) || + (!call->IsTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN))); + } + +#ifdef DEBUG + if (compStressCompile(STRESS_CLONE_EXPR, 30)) + { + // Clone all the trees to stress gtCloneExpr() + + if (verbose) + { + printf("\nfgMorphTree (stressClone from):\n"); + gtDispTree(morphedTree); + } + + morphedTree = gtCloneExpr(morphedTree); + noway_assert(morphedTree != nullptr); + + if (verbose) + { + printf("\nfgMorphTree (stressClone to):\n"); + gtDispTree(morphedTree); + } + } + + /* If the hash value changes. we modified the tree during morphing */ + if (verbose) + { + unsigned newHash = gtHashValue(morphedTree); + if (newHash != oldHash) + { + printf("\nfgMorphTree " FMT_BB ", " FMT_STMT " (after)\n", block->bbNum, stmt->GetID()); + gtDispTree(morphedTree); + } + } +#endif + + /* Check for morphedTree as a GT_COMMA with an unconditional throw */ + if (!gtIsActiveCSE_Candidate(morphedTree) && fgIsCommaThrow(morphedTree, true)) + { + /* Use the call as the new stmt */ + morphedTree = morphedTree->AsOp()->gtOp1; + noway_assert(morphedTree->gtOper == GT_CALL); + noway_assert((morphedTree->gtFlags & GTF_COLON_COND) == 0); + + fgRemoveRestOfBlock = true; + } + + stmt->SetRootNode(morphedTree); + + if (fgRemoveRestOfBlock) + { + continue; + } + + /* Has the statement been optimized away */ + + if (fgCheckRemoveStmt(block, stmt)) + { + continue; + } + + /* Check if this block ends with a conditional branch that can be folded */ + + if (fgFoldConditional(block)) + { + continue; + } + + if (ehBlockHasExnFlowDsc(block)) + { + continue; + } + } + + if (fgRemoveRestOfBlock) + { + if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH)) + { + Statement* first = block->firstStmt(); + noway_assert(first); + Statement* lastStmt = block->lastStmt(); + noway_assert(lastStmt && lastStmt->GetNextStmt() == nullptr); + GenTree* last = lastStmt->GetRootNode(); + + if (((block->bbJumpKind == BBJ_COND) && (last->gtOper == GT_JTRUE)) || + ((block->bbJumpKind == BBJ_SWITCH) && (last->gtOper == GT_SWITCH))) + { + GenTree* op1 = last->AsOp()->gtOp1; + + if (op1->OperKind() & GTK_RELOP) + { + /* Unmark the comparison node with GTF_RELOP_JMP_USED */ + op1->gtFlags &= ~GTF_RELOP_JMP_USED; + } + + lastStmt->SetRootNode(fgMorphTree(op1)); + } + } + + /* Mark block as a BBJ_THROW block */ + fgConvertBBToThrowBB(block); + } + +#if FEATURE_FASTTAILCALL + GenTree* recursiveTailCall = nullptr; + if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall)) + { + fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall()); + } +#endif + + // Reset this back so that it doesn't leak out impacting other blocks + fgRemoveRestOfBlock = false; +} + +/***************************************************************************** + * + * Morph the blocks of the method. + * Returns true if the basic block list is modified. + * This function should be called just once. + */ + +void Compiler::fgMorphBlocks() +{ +#ifdef DEBUG + if (verbose) + { + printf("\n*************** In fgMorphBlocks()\n"); + } +#endif + + /* Since fgMorphTree can be called after various optimizations to re-arrange + * the nodes we need a global flag to signal if we are during the one-pass + * global morphing */ + + fgGlobalMorph = true; + +#if LOCAL_ASSERTION_PROP + // + // Local assertion prop is enabled if we are optimized + // + optLocalAssertionProp = opts.OptimizationEnabled(); + + if (optLocalAssertionProp) + { + // + // Initialize for local assertion prop + // + optAssertionInit(true); + } +#elif ASSERTION_PROP + // + // If LOCAL_ASSERTION_PROP is not set + // and we have global assertion prop + // then local assertion prop is always off + // + optLocalAssertionProp = false; + +#endif + + /*------------------------------------------------------------------------- + * Process all basic blocks in the function + */ + + BasicBlock* block = fgFirstBB; + noway_assert(block); + + do + { +#if OPT_BOOL_OPS + bool lnot = false; +#endif + + bool loadw = false; + +#ifdef DEBUG + if (verbose) + { + printf("\nMorphing " FMT_BB " of '%s'\n", block->bbNum, info.compFullName); + } +#endif + +#if LOCAL_ASSERTION_PROP + if (optLocalAssertionProp) + { + // + // Clear out any currently recorded assertion candidates + // before processing each basic block, + // also we must handle QMARK-COLON specially + // + optAssertionReset(0); + } +#endif + // Make the current basic block address available globally. + compCurBB = block; + + // Process all statement trees in the basic block. + fgMorphStmts(block, &lnot, &loadw); + + // Do we need to merge the result of this block into a single return block? + if ((block->bbJumpKind == BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)) + { + if ((genReturnBB != nullptr) && (genReturnBB != block)) + { + fgMergeBlockReturn(block); + } + } + + block = block->bbNext; + } while (block != nullptr); + + // We are done with the global morphing phase + fgGlobalMorph = false; + compCurBB = nullptr; + + // Under OSR, we no longer need to specially protect the original method entry + // + if (opts.IsOSR() && (fgEntryBB != nullptr) && (fgEntryBB->bbFlags & BBF_IMPORTED)) + { + JITDUMP("OSR: un-protecting original method entry " FMT_BB "\n", fgEntryBB->bbNum); + assert(fgEntryBB->bbRefs > 0); + fgEntryBB->bbRefs--; + // We don't need to remember this block anymore. + fgEntryBB = nullptr; + } + +#ifdef DEBUG + if (verboseTrees) + { + fgDispBasicBlocks(true); + } +#endif +} + +//------------------------------------------------------------------------ +// fgMergeBlockReturn: assign the block return value (if any) into the single return temp +// and branch to the single return block. +// +// Arguments: +// block - the block to process. +// +// Notes: +// A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN. +// For example a method returning void could have an empty block with jump kind BBJ_RETURN. +// Such blocks do materialize as part of in-lining. +// +// A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN. +// It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC. +// For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal +// is BAD_VAR_NUM. +// +void Compiler::fgMergeBlockReturn(BasicBlock* block) +{ + assert((block->bbJumpKind == BBJ_RETURN) && ((block->bbFlags & BBF_HAS_JMP) == 0)); + assert((genReturnBB != nullptr) && (genReturnBB != block)); + + // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN. + + Statement* lastStmt = block->lastStmt(); + GenTree* ret = (lastStmt != nullptr) ? lastStmt->GetRootNode() : nullptr; + + if ((ret != nullptr) && (ret->OperGet() == GT_RETURN) && ((ret->gtFlags & GTF_RET_MERGED) != 0)) + { + // This return was generated during epilog merging, so leave it alone + } + else + { + // We'll jump to the genReturnBB. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if !defined(TARGET_X86) + if (info.compFlags & CORINFO_FLG_SYNCH) + { + fgConvertSyncReturnToLeave(block); + } + else +#endif // !TARGET_X86 + { + block->bbJumpKind = BBJ_ALWAYS; + block->bbJumpDest = genReturnBB; + fgAddRefPred(genReturnBB, block); + fgReturnCount--; + } + if (genReturnLocal != BAD_VAR_NUM) + { + // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal. + + // Method must be returning a value other than TYP_VOID. + noway_assert(compMethodHasRetVal()); + + // This block must be ending with a GT_RETURN + noway_assert(lastStmt != nullptr); + noway_assert(lastStmt->GetNextStmt() == nullptr); + noway_assert(ret != nullptr); + + // GT_RETURN must have non-null operand as the method is returning the value assigned to + // genReturnLocal + noway_assert(ret->OperGet() == GT_RETURN); + noway_assert(ret->gtGetOp1() != nullptr); + + Statement* pAfterStatement = lastStmt; + IL_OFFSETX offset = lastStmt->GetILOffsetX(); + GenTree* tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1(), &pAfterStatement, offset, block); + if (tree->OperIsCopyBlkOp()) + { + tree = fgMorphCopyBlock(tree); + } + + if (pAfterStatement == lastStmt) + { + lastStmt->SetRootNode(tree); + } + else + { + // gtNewTempAssign inserted additional statements after last + fgRemoveStmt(block, lastStmt); + Statement* newStmt = gtNewStmt(tree, offset); + fgInsertStmtAfter(block, pAfterStatement, newStmt); + lastStmt = newStmt; + } + } + else if (ret != nullptr && ret->OperGet() == GT_RETURN) + { + // This block ends with a GT_RETURN + noway_assert(lastStmt != nullptr); + noway_assert(lastStmt->GetNextStmt() == nullptr); + + // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn + // block + noway_assert(ret->TypeGet() == TYP_VOID); + noway_assert(ret->gtGetOp1() == nullptr); + + fgRemoveStmt(block, lastStmt); + } + + JITDUMP("\nUpdate " FMT_BB " to jump to common return block.\n", block->bbNum); + DISPBLOCK(block); + + if (block->hasProfileWeight()) + { + BasicBlock::weight_t const oldWeight = + genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newWeight = oldWeight + block->bbWeight; + + JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight, + block->bbNum, genReturnBB->bbNum); + + genReturnBB->setBBProfileWeight(newWeight); + DISPBLOCK(genReturnBB); + } + } +} + +/***************************************************************************** + * + * Make some decisions about the kind of code to generate. + */ + +void Compiler::fgSetOptions() +{ +#ifdef DEBUG + /* Should we force fully interruptible code ? */ + if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30)) + { + noway_assert(!codeGen->isGCTypeFixed()); + SetInterruptible(true); + } +#endif + + if (opts.compDbgCode) + { + assert(!codeGen->isGCTypeFixed()); + SetInterruptible(true); // debugging is easier this way ... + } + + /* Assume we won't need an explicit stack frame if this is allowed */ + + if (compLocallocUsed) + { + codeGen->setFramePointerRequired(true); + } + +#ifdef TARGET_X86 + + if (compTailCallUsed) + codeGen->setFramePointerRequired(true); + +#endif // TARGET_X86 + + if (!opts.genFPopt) + { + codeGen->setFramePointerRequired(true); + } + + // Assert that the EH table has been initialized by now. Note that + // compHndBBtabAllocCount never decreases; it is a high-water mark + // of table allocation. In contrast, compHndBBtabCount does shrink + // if we delete a dead EH region, and if it shrinks to zero, the + // table pointer compHndBBtab is unreliable. + assert(compHndBBtabAllocCount >= info.compXcptnsCount); + +#ifdef TARGET_X86 + + // Note: this case, and the !X86 case below, should both use the + // !X86 path. This would require a few more changes for X86 to use + // compHndBBtabCount (the current number of EH clauses) instead of + // info.compXcptnsCount (the number of EH clauses in IL), such as + // in ehNeedsShadowSPslots(). This is because sometimes the IL has + // an EH clause that we delete as statically dead code before we + // get here, leaving no EH clauses left, and thus no requirement + // to use a frame pointer because of EH. But until all the code uses + // the same test, leave info.compXcptnsCount here. + if (info.compXcptnsCount > 0) + { + codeGen->setFramePointerRequiredEH(true); + } + +#else // !TARGET_X86 + + if (compHndBBtabCount > 0) + { + codeGen->setFramePointerRequiredEH(true); + } + +#endif // TARGET_X86 + +#ifdef UNIX_X86_ABI + if (info.compXcptnsCount > 0) + { + assert(!codeGen->isGCTypeFixed()); + // Enforce fully interruptible codegen for funclet unwinding + SetInterruptible(true); + } +#endif // UNIX_X86_ABI + + if (compMethodRequiresPInvokeFrame()) + { + codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame + } + + if (info.compPublishStubParam) + { + codeGen->setFramePointerRequiredGCInfo(true); + } + + if (compIsProfilerHookNeeded()) + { + codeGen->setFramePointerRequired(true); + } + + if (info.compIsVarArgs) + { + // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative. + codeGen->setFramePointerRequiredGCInfo(true); + } + + if (lvaReportParamTypeArg()) + { + codeGen->setFramePointerRequiredGCInfo(true); + } + + // printf("method will %s be fully interruptible\n", GetInterruptible() ? " " : "not"); +} + +/*****************************************************************************/ + +GenTree* Compiler::fgInitThisClass() +{ + noway_assert(!compIsForInlining()); + + CORINFO_LOOKUP_KIND kind; + info.compCompHnd->getLocationOfThisType(info.compMethodHnd, &kind); + + if (!kind.needsRuntimeLookup) + { + return fgGetSharedCCtor(info.compClassHnd); + } + else + { +#ifdef FEATURE_READYTORUN_COMPILER + // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR. + if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI)) + { + CORINFO_RESOLVED_TOKEN resolvedToken; + memset(&resolvedToken, 0, sizeof(resolvedToken)); + + // We are in a shared method body, but maybe we don't need a runtime lookup after all. + // This covers the case of a generic method on a non-generic type. + if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST)) + { + resolvedToken.hClass = info.compClassHnd; + return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF); + } + + // We need a runtime lookup. + GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind); + + // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static + // base of the class that owns the method being compiled". If we're in this method, it means we're not + // inlining and there's no ambiguity. + return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF, + gtNewCallArgs(ctxTree), &kind); + } +#endif + + // Collectible types requires that for shared generic code, if we use the generic context paramter + // that we report it. (This is a conservative approach, we could detect some cases particularly when the + // context parameter is this that we don't need the eager reporting logic.) + lvaGenericsContextInUse = true; + + switch (kind.runtimeLookupKind) + { + case CORINFO_LOOKUP_THISOBJ: + { + // This code takes a this pointer; but we need to pass the static method desc to get the right point in + // the hierarchy + GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF); + vtTree->gtFlags |= GTF_VAR_CONTEXT; + // Vtable pointer of this object + vtTree = gtNewMethodTableLookup(vtTree); + GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd); + + return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewCallArgs(vtTree, methodHnd)); + } + + case CORINFO_LOOKUP_CLASSPARAM: + { + GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); + vtTree->gtFlags |= GTF_VAR_CONTEXT; + return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, gtNewCallArgs(vtTree)); + } + + case CORINFO_LOOKUP_METHODPARAM: + { + GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); + methHndTree->gtFlags |= GTF_VAR_CONTEXT; + return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, + gtNewCallArgs(gtNewIconNode(0), methHndTree)); + } + + default: + noway_assert(!"Unknown LOOKUP_KIND"); + UNREACHABLE(); + } + } +} + +#ifdef DEBUG +/***************************************************************************** + * + * Tree walk callback to make sure no GT_QMARK nodes are present in the tree, + * except for the allowed ? 1 : 0; pattern. + */ +Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTree** tree, fgWalkData* data) +{ + if ((*tree)->OperGet() == GT_QMARK) + { + fgCheckQmarkAllowedForm(*tree); + } + return WALK_CONTINUE; +} + +void Compiler::fgCheckQmarkAllowedForm(GenTree* tree) +{ + assert(tree->OperGet() == GT_QMARK); + assert(!"Qmarks beyond morph disallowed."); +} + +/***************************************************************************** + * + * Verify that the importer has created GT_QMARK nodes in a way we can + * process them. The following is allowed: + * + * 1. A top level qmark. Top level qmark is of the form: + * a) (bool) ? (void) : (void) OR + * b) V0N = (bool) ? (type) : (type) + * + * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child + * of either op1 of colon or op2 of colon but not a child of any other + * operator. + */ +void Compiler::fgPreExpandQmarkChecks(GenTree* expr) +{ + GenTree* topQmark = fgGetTopLevelQmark(expr); + + // If the top level Qmark is null, then scan the tree to make sure + // there are no qmarks within it. + if (topQmark == nullptr) + { + fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); + } + else + { + // We could probably expand the cond node also, but don't think the extra effort is necessary, + // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks. + fgWalkTreePre(&topQmark->AsOp()->gtOp1, Compiler::fgAssertNoQmark, nullptr); + + fgPreExpandQmarkChecks(topQmark->AsOp()->gtOp2->AsOp()->gtOp1); + fgPreExpandQmarkChecks(topQmark->AsOp()->gtOp2->AsOp()->gtOp2); + } +} +#endif // DEBUG + +/***************************************************************************** + * + * Get the top level GT_QMARK node in a given "expr", return NULL if such a + * node is not present. If the top level GT_QMARK node is assigned to a + * GT_LCL_VAR, then return the lcl node in ppDst. + * + */ +GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */) +{ + if (ppDst != nullptr) + { + *ppDst = nullptr; + } + + GenTree* topQmark = nullptr; + if (expr->gtOper == GT_QMARK) + { + topQmark = expr; + } + else if (expr->gtOper == GT_ASG && expr->AsOp()->gtOp2->gtOper == GT_QMARK && + expr->AsOp()->gtOp1->gtOper == GT_LCL_VAR) + { + topQmark = expr->AsOp()->gtOp2; + if (ppDst != nullptr) + { + *ppDst = expr->AsOp()->gtOp1; + } + } + return topQmark; +} + +/********************************************************************************* + * + * For a castclass helper call, + * Importer creates the following tree: + * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper()); + * + * This method splits the qmark expression created by the importer into the + * following blocks: (block, asg, cond1, cond2, helper, remainder) + * Notice that op1 is the result for both the conditions. So we coalesce these + * assignments into a single block instead of two blocks resulting a nested diamond. + * + * +---------->-----------+ + * | | | + * ^ ^ v + * | | | + * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder + * + * We expect to achieve the following codegen: + * mov rsi, rdx tmp = op1 // asgBlock + * test rsi, rsi goto skip if tmp == null ? // cond1Block + * je SKIP + * mov rcx, 0x76543210 cns = op2 // cond2Block + * cmp qword ptr [rsi], rcx goto skip if *tmp == op2 + * je SKIP + * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock + * mov rsi, rax + * SKIP: // remainderBlock + * tmp has the result. + * + */ +void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt) +{ +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n", block->bbNum); + fgDispBasicBlocks(block, block, true); + } +#endif // DEBUG + + GenTree* expr = stmt->GetRootNode(); + + GenTree* dst = nullptr; + GenTree* qmark = fgGetTopLevelQmark(expr, &dst); + noway_assert(dst != nullptr); + + assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF); + + // Get cond, true, false exprs for the qmark. + GenTree* condExpr = qmark->gtGetOp1(); + GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); + GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); + + // Get cond, true, false exprs for the nested qmark. + GenTree* nestedQmark = falseExpr; + GenTree* cond2Expr; + GenTree* true2Expr; + GenTree* false2Expr; + + if (nestedQmark->gtOper == GT_QMARK) + { + cond2Expr = nestedQmark->gtGetOp1(); + true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode(); + false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode(); + + assert(cond2Expr->gtFlags & GTF_RELOP_QMARK); + cond2Expr->gtFlags &= ~GTF_RELOP_QMARK; + } + else + { + // This is a rare case that arises when we are doing minopts and encounter isinst of null + // gtFoldExpr was still is able to optimize away part of the tree (but not all). + // That means it does not match our pattern. + + // Rather than write code to handle this case, just fake up some nodes to make it match the common + // case. Synthesize a comparison that is always true, and for the result-on-true, use the + // entire subtree we expected to be the nested question op. + + cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL)); + true2Expr = nestedQmark; + false2Expr = gtNewIconNode(0, TYP_I_IMPL); + } + assert(false2Expr->OperGet() == trueExpr->OperGet()); + + // Clear flags as they are now going to be part of JTRUE. + assert(condExpr->gtFlags & GTF_RELOP_QMARK); + condExpr->gtFlags &= ~GTF_RELOP_QMARK; + + // Create the chain of blocks. See method header comment. + // The order of blocks after this is the following: + // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock + // + // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', + // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only + // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely + // remainderBlock will still be GC safe. + unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; + BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); + fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. + + BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true); + BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true); + BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true); + BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true); + + remainderBlock->bbFlags |= propagateFlags; + + // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). + // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. + if ((block->bbFlags & BBF_INTERNAL) == 0) + { + helperBlock->bbFlags &= ~BBF_INTERNAL; + cond2Block->bbFlags &= ~BBF_INTERNAL; + cond1Block->bbFlags &= ~BBF_INTERNAL; + asgBlock->bbFlags &= ~BBF_INTERNAL; + helperBlock->bbFlags |= BBF_IMPORTED; + cond2Block->bbFlags |= BBF_IMPORTED; + cond1Block->bbFlags |= BBF_IMPORTED; + asgBlock->bbFlags |= BBF_IMPORTED; + } + + // Chain the flow correctly. + fgAddRefPred(asgBlock, block); + fgAddRefPred(cond1Block, asgBlock); + fgAddRefPred(cond2Block, cond1Block); + fgAddRefPred(helperBlock, cond2Block); + fgAddRefPred(remainderBlock, helperBlock); + fgAddRefPred(remainderBlock, cond1Block); + fgAddRefPred(remainderBlock, cond2Block); + + cond1Block->bbJumpDest = remainderBlock; + cond2Block->bbJumpDest = remainderBlock; + + // Set the weights; some are guesses. + asgBlock->inheritWeight(block); + cond1Block->inheritWeight(block); + cond2Block->inheritWeightPercentage(cond1Block, 50); + helperBlock->inheritWeightPercentage(cond2Block, 50); + + // Append cond1 as JTRUE to cond1Block + GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr); + Statement* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetILOffsetX()); + fgInsertStmtAtEnd(cond1Block, jmpStmt); + + // Append cond2 as JTRUE to cond2Block + jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr); + jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetILOffsetX()); + fgInsertStmtAtEnd(cond2Block, jmpStmt); + + // AsgBlock should get tmp = op1 assignment. + trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr); + Statement* trueStmt = fgNewStmtFromTree(trueExpr, stmt->GetILOffsetX()); + fgInsertStmtAtEnd(asgBlock, trueStmt); + + // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper. + gtReverseCond(cond2Expr); + GenTree* helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr); + Statement* helperStmt = fgNewStmtFromTree(helperExpr, stmt->GetILOffsetX()); + fgInsertStmtAtEnd(helperBlock, helperStmt); + + // Finally remove the nested qmark stmt. + fgRemoveStmt(block, stmt); + + if (true2Expr->OperIs(GT_CALL) && (true2Expr->AsCall()->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN)) + { + fgConvertBBToThrowBB(helperBlock); + } + +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n", block->bbNum); + fgDispBasicBlocks(block, remainderBlock, true); + } +#endif // DEBUG +} + +/***************************************************************************** + * + * Expand a statement with a top level qmark node. There are three cases, based + * on whether the qmark has both "true" and "false" arms, or just one of them. + * + * S0; + * C ? T : F; + * S1; + * + * Generates ===> + * + * bbj_always + * +---->------+ + * false | | + * S0 -->-- ~C -->-- T F -->-- S1 + * | | + * +--->--------+ + * bbj_cond(true) + * + * ----------------------------------------- + * + * S0; + * C ? T : NOP; + * S1; + * + * Generates ===> + * + * false + * S0 -->-- ~C -->-- T -->-- S1 + * | | + * +-->-------------+ + * bbj_cond(true) + * + * ----------------------------------------- + * + * S0; + * C ? NOP : F; + * S1; + * + * Generates ===> + * + * false + * S0 -->-- C -->-- F -->-- S1 + * | | + * +-->------------+ + * bbj_cond(true) + * + * If the qmark assigns to a variable, then create tmps for "then" + * and "else" results and assign the temp to the variable as a writeback step. + */ +void Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) +{ + GenTree* expr = stmt->GetRootNode(); + + // Retrieve the Qmark node to be expanded. + GenTree* dst = nullptr; + GenTree* qmark = fgGetTopLevelQmark(expr, &dst); + if (qmark == nullptr) + { + return; + } + + if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF) + { + fgExpandQmarkForCastInstOf(block, stmt); + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding top-level qmark in " FMT_BB " (before)\n", block->bbNum); + fgDispBasicBlocks(block, block, true); + } +#endif // DEBUG + + // Retrieve the operands. + GenTree* condExpr = qmark->gtGetOp1(); + GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); + GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); + + assert(condExpr->gtFlags & GTF_RELOP_QMARK); + condExpr->gtFlags &= ~GTF_RELOP_QMARK; + + assert(!varTypeIsFloating(condExpr->TypeGet())); + + bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP); + bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP); + assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark! + + // Create remainder, cond and "else" blocks. After this, the blocks are in this order: + // block ... condBlock ... elseBlock ... remainderBlock + // + // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', + // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only + // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely + // remainderBlock will still be GC safe. + unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT; + BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); + fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. + + BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true); + BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true); + + // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). + // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. + if ((block->bbFlags & BBF_INTERNAL) == 0) + { + condBlock->bbFlags &= ~BBF_INTERNAL; + elseBlock->bbFlags &= ~BBF_INTERNAL; + condBlock->bbFlags |= BBF_IMPORTED; + elseBlock->bbFlags |= BBF_IMPORTED; + } + + remainderBlock->bbFlags |= propagateFlags; + + condBlock->inheritWeight(block); + + fgAddRefPred(condBlock, block); + fgAddRefPred(elseBlock, condBlock); + fgAddRefPred(remainderBlock, elseBlock); + + BasicBlock* thenBlock = nullptr; + if (hasTrueExpr && hasFalseExpr) + { + // bbj_always + // +---->------+ + // false | | + // S0 -->-- ~C -->-- T F -->-- S1 + // | | + // +--->--------+ + // bbj_cond(true) + // + gtReverseCond(condExpr); + condBlock->bbJumpDest = elseBlock; + + thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true); + thenBlock->bbJumpDest = remainderBlock; + if ((block->bbFlags & BBF_INTERNAL) == 0) + { + thenBlock->bbFlags &= ~BBF_INTERNAL; + thenBlock->bbFlags |= BBF_IMPORTED; + } + + fgAddRefPred(thenBlock, condBlock); + fgAddRefPred(remainderBlock, thenBlock); + + thenBlock->inheritWeightPercentage(condBlock, 50); + elseBlock->inheritWeightPercentage(condBlock, 50); + } + else if (hasTrueExpr) + { + // false + // S0 -->-- ~C -->-- T -->-- S1 + // | | + // +-->-------------+ + // bbj_cond(true) + // + gtReverseCond(condExpr); + condBlock->bbJumpDest = remainderBlock; + fgAddRefPred(remainderBlock, condBlock); + // Since we have no false expr, use the one we'd already created. + thenBlock = elseBlock; + elseBlock = nullptr; + + thenBlock->inheritWeightPercentage(condBlock, 50); + } + else if (hasFalseExpr) + { + // false + // S0 -->-- C -->-- F -->-- S1 + // | | + // +-->------------+ + // bbj_cond(true) + // + condBlock->bbJumpDest = remainderBlock; + fgAddRefPred(remainderBlock, condBlock); + + elseBlock->inheritWeightPercentage(condBlock, 50); + } + + GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1()); + Statement* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetILOffsetX()); + fgInsertStmtAtEnd(condBlock, jmpStmt); + + // Remove the original qmark statement. + fgRemoveStmt(block, stmt); + + // Since we have top level qmarks, we either have a dst for it in which case + // we need to create tmps for true and falseExprs, else just don't bother + // assigning. + unsigned lclNum = BAD_VAR_NUM; + if (dst != nullptr) + { + assert(dst->gtOper == GT_LCL_VAR); + lclNum = dst->AsLclVar()->GetLclNum(); + } + else + { + assert(qmark->TypeGet() == TYP_VOID); + } + + if (hasTrueExpr) + { + if (dst != nullptr) + { + trueExpr = gtNewTempAssign(lclNum, trueExpr); + } + Statement* trueStmt = fgNewStmtFromTree(trueExpr, stmt->GetILOffsetX()); + fgInsertStmtAtEnd(thenBlock, trueStmt); + } + + // Assign the falseExpr into the dst or tmp, insert in elseBlock + if (hasFalseExpr) + { + if (dst != nullptr) + { + falseExpr = gtNewTempAssign(lclNum, falseExpr); + } + Statement* falseStmt = fgNewStmtFromTree(falseExpr, stmt->GetILOffsetX()); + fgInsertStmtAtEnd(elseBlock, falseStmt); + } + +#ifdef DEBUG + if (verbose) + { + printf("\nExpanding top-level qmark in " FMT_BB " (after)\n", block->bbNum); + fgDispBasicBlocks(block, remainderBlock, true); + } +#endif // DEBUG +} + +/***************************************************************************** + * + * Expand GT_QMARK nodes from the flow graph into basic blocks. + * + */ + +void Compiler::fgExpandQmarkNodes() +{ + if (compQmarkUsed) + { + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + for (Statement* stmt : block->Statements()) + { + GenTree* expr = stmt->GetRootNode(); +#ifdef DEBUG + fgPreExpandQmarkChecks(expr); +#endif + fgExpandQmarkStmt(block, stmt); + } + } +#ifdef DEBUG + fgPostExpandQmarkChecks(); +#endif + } + compQmarkRationalized = true; +} + +#ifdef DEBUG +/***************************************************************************** + * + * Make sure we don't have any more GT_QMARK nodes. + * + */ +void Compiler::fgPostExpandQmarkChecks() +{ + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + for (Statement* stmt : block->Statements()) + { + GenTree* expr = stmt->GetRootNode(); + fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr); + } + } +} +#endif + +/***************************************************************************** + * + * Promoting struct locals + */ +void Compiler::fgPromoteStructs() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In fgPromoteStructs()\n"); + } +#endif // DEBUG + + if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) + { + JITDUMP(" promotion opt flag not enabled\n"); + return; + } + + if (fgNoStructPromotion) + { + JITDUMP(" promotion disabled by JitNoStructPromotion\n"); + return; + } + +#if 0 + // The code in this #if has been useful in debugging struct promotion issues, by + // enabling selective enablement of the struct promotion optimization according to + // method hash. +#ifdef DEBUG + unsigned methHash = info.compMethodHash(); + char* lostr = getenv("structpromohashlo"); + unsigned methHashLo = 0; + if (lostr != NULL) + { + sscanf_s(lostr, "%x", &methHashLo); + } + char* histr = getenv("structpromohashhi"); + unsigned methHashHi = UINT32_MAX; + if (histr != NULL) + { + sscanf_s(histr, "%x", &methHashHi); + } + if (methHash < methHashLo || methHash > methHashHi) + { + return; + } + else + { + printf("Promoting structs for method %s, hash = 0x%x.\n", + info.compFullName, info.compMethodHash()); + printf(""); // in our logic this causes a flush + } +#endif // DEBUG +#endif // 0 + + if (info.compIsVarArgs) + { + JITDUMP(" promotion disabled because of varargs\n"); + return; + } + +#ifdef DEBUG + if (verbose) + { + printf("\nlvaTable before fgPromoteStructs\n"); + lvaTableDump(); + } +#endif // DEBUG + + // The lvaTable might grow as we grab temps. Make a local copy here. + unsigned startLvaCount = lvaCount; + + // + // Loop through the original lvaTable. Looking for struct locals to be promoted. + // + lvaStructPromotionInfo structPromotionInfo; + bool tooManyLocalsReported = false; + + // Clear the structPromotionHelper, since it is used during inlining, at which point it + // may be conservative about looking up SIMD info. + // We don't want to preserve those conservative decisions for the actual struct promotion. + structPromotionHelper->Clear(); + + for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++) + { + // Whether this var got promoted + bool promotedVar = false; + LclVarDsc* varDsc = &lvaTable[lclNum]; + + // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote + // its fields. Instead, we will attempt to enregister the entire struct. + if (varDsc->lvIsSIMDType() && (varDsc->lvIsUsedInSIMDIntrinsic() || isOpaqueSIMDLclVar(varDsc))) + { + varDsc->lvRegStruct = true; + } + // Don't promote if we have reached the tracking limit. + else if (lvaHaveManyLocals()) + { + // Print the message first time when we detected this condition + if (!tooManyLocalsReported) + { + JITDUMP("Stopped promoting struct fields, due to too many locals.\n"); + } + tooManyLocalsReported = true; + } + else if (varTypeIsStruct(varDsc)) + { + assert(structPromotionHelper != nullptr); + promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum); + } + + if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed) + { + // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, + // we will treat it as a reg struct. + varDsc->lvRegStruct = true; + } + } + +#ifdef TARGET_ARM + if (structPromotionHelper->GetRequiresScratchVar()) + { + // Ensure that the scratch variable is allocated, in case we + // pass a promoted struct as an argument. + if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) + { + lvaPromotedStructAssemblyScratchVar = + lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var.")); + lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; + } + } +#endif // TARGET_ARM + +#ifdef DEBUG + if (verbose) + { + printf("\nlvaTable after fgPromoteStructs\n"); + lvaTableDump(); + } +#endif // DEBUG +} + +void Compiler::fgMorphStructField(GenTree* tree, GenTree* parent) +{ + noway_assert(tree->OperGet() == GT_FIELD); + + GenTreeField* field = tree->AsField(); + GenTree* objRef = field->gtFldObj; + GenTree* obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->AsOp()->gtOp1 : nullptr; + noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))); + + /* Is this an instance data member? */ + + if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)) + { + unsigned lclNum = obj->AsLclVarCommon()->GetLclNum(); + const LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (varTypeIsStruct(obj)) + { + if (varDsc->lvPromoted) + { + // Promoted struct + unsigned fldOffset = field->gtFldOffset; + unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); + + if (fieldLclIndex == BAD_VAR_NUM) + { + // Access a promoted struct's field with an offset that doesn't correspond to any field. + // It can happen if the struct was cast to another struct with different offsets. + return; + } + + const LclVarDsc* fieldDsc = &lvaTable[fieldLclIndex]; + var_types fieldType = fieldDsc->TypeGet(); + + assert(fieldType != TYP_STRUCT); // promoted LCL_VAR can't have a struct type. + if (tree->TypeGet() != fieldType) + { + if (tree->TypeGet() != TYP_STRUCT) + { + // This is going to be an incorrect instruction promotion. + // For example when we try to read int as long. + return; + } + + if (field->gtFldHnd != fieldDsc->lvFieldHnd) + { + CORINFO_CLASS_HANDLE fieldTreeClass = nullptr, fieldDscClass = nullptr; + + CorInfoType fieldTreeType = info.compCompHnd->getFieldType(field->gtFldHnd, &fieldTreeClass); + CorInfoType fieldDscType = info.compCompHnd->getFieldType(fieldDsc->lvFieldHnd, &fieldDscClass); + if (fieldTreeType != fieldDscType || fieldTreeClass != fieldDscClass) + { + // Access the promoted field with a different class handle, can't check that types match. + return; + } + // Access the promoted field as a field of a non-promoted struct with the same class handle. + } + else + { + // As we already checked this above, we must have a tree with a TYP_STRUCT type + // + assert(tree->TypeGet() == TYP_STRUCT); + + // The field tree accesses it as a struct, but the promoted LCL_VAR field + // says that it has another type. This happens when struct promotion unwraps + // a single field struct to get to its ultimate type. + // + // Note that currently, we cannot have a promoted LCL_VAR field with a struct type. + // + // This mismatch in types can lead to problems for some parent node type like GT_RETURN. + // So we check the parent node and only allow this optimization when we have + // a GT_ADDR or a GT_ASG. + // + // Note that for a GT_ASG we have to do some additional work, + // see below after the SetOper(GT_LCL_VAR) + // + if (!parent->OperIs(GT_ADDR, GT_ASG)) + { + // Don't transform other operations such as GT_RETURN + // + return; + } +#ifdef DEBUG + // This is an additional DEBUG-only sanity check + // + assert(structPromotionHelper != nullptr); + structPromotionHelper->CheckRetypedAsScalar(field->gtFldHnd, fieldType); +#endif // DEBUG + } + } + + tree->SetOper(GT_LCL_VAR); + tree->AsLclVarCommon()->SetLclNum(fieldLclIndex); + tree->gtType = fieldType; + tree->gtFlags &= GTF_NODE_MASK; // Note: that clears all flags except `GTF_COLON_COND`. + + if (parent->gtOper == GT_ASG) + { + // If we are changing the left side of an assignment, we need to set + // these two flags: + // + if (parent->AsOp()->gtOp1 == tree) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } + + // Promotion of struct containing struct fields where the field + // is a struct with a single pointer sized scalar type field: in + // this case struct promotion uses the type of the underlying + // scalar field as the type of struct field instead of recursively + // promoting. This can lead to a case where we have a block-asgn + // with its RHS replaced with a scalar type. Mark RHS value as + // DONT_CSE so that assertion prop will not do const propagation. + // The reason this is required is that if RHS of a block-asg is a + // constant, then it is interpreted as init-block incorrectly. + // + // TODO - This can also be avoided if we implement recursive struct + // promotion, tracked by #10019. + if (varTypeIsStruct(parent) && parent->AsOp()->gtOp2 == tree && !varTypeIsStruct(tree)) + { + tree->gtFlags |= GTF_DONT_CSE; + } + } +#ifdef DEBUG + if (verbose) + { + printf("Replacing the field in promoted struct with local var V%02u\n", fieldLclIndex); + } +#endif // DEBUG + } + } + else + { + // Normed struct + // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if + // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 + // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However, + // there is one extremely rare case where that won't be true. An enum type is a special value type + // that contains exactly one element of a primitive integer type (that, for CLS programs is named + // "value__"). The VM tells us that a local var of that enum type is the primitive type of the + // enum's single field. It turns out that it is legal for IL to access this field using ldflda or + // ldfld. For example: + // + // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum + // { + // .field public specialname rtspecialname int16 value__ + // .field public static literal valuetype mynamespace.e_t one = int16(0x0000) + // } + // .method public hidebysig static void Main() cil managed + // { + // .locals init (valuetype mynamespace.e_t V_0) + // ... + // ldloca.s V_0 + // ldflda int16 mynamespace.e_t::value__ + // ... + // } + // + // Normally, compilers will not generate the ldflda, since it is superfluous. + // + // In the example, the lclVar is short, but the JIT promotes all trees using this local to the + // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type + // mismatch like this, don't do this morphing. The local var may end up getting marked as + // address taken, and the appropriate SHORT load will be done from memory in that case. + + if (tree->TypeGet() == obj->TypeGet()) + { + tree->ChangeOper(GT_LCL_VAR); + tree->AsLclVarCommon()->SetLclNum(lclNum); + tree->gtFlags &= GTF_NODE_MASK; + + if ((parent->gtOper == GT_ASG) && (parent->AsOp()->gtOp1 == tree)) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } +#ifdef DEBUG + if (verbose) + { + printf("Replacing the field in normed struct with local var V%02u\n", lclNum); + } +#endif // DEBUG + } + } + } +} + +void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) +{ + noway_assert(tree->OperGet() == GT_LCL_FLD); + + unsigned lclNum = tree->AsLclFld()->GetLclNum(); + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (varTypeIsStruct(varDsc)) + { + if (varDsc->lvPromoted) + { + // Promoted struct + unsigned fldOffset = tree->AsLclFld()->GetLclOffs(); + unsigned fieldLclIndex = 0; + LclVarDsc* fldVarDsc = nullptr; + + if (fldOffset != BAD_VAR_NUM) + { + fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); + noway_assert(fieldLclIndex != BAD_VAR_NUM); + fldVarDsc = &lvaTable[fieldLclIndex]; + } + + var_types treeType = tree->TypeGet(); + var_types fieldType = fldVarDsc->TypeGet(); + if (fldOffset != BAD_VAR_NUM && + ((genTypeSize(fieldType) == genTypeSize(treeType)) || (varDsc->lvFieldCnt == 1))) + { + // There is an existing sub-field we can use. + tree->AsLclFld()->SetLclNum(fieldLclIndex); + + // The field must be an enregisterable type; otherwise it would not be a promoted field. + // The tree type may not match, e.g. for return types that have been morphed, but both + // must be enregisterable types. + assert(varTypeIsEnregisterable(treeType) && varTypeIsEnregisterable(fieldType)); + + tree->ChangeOper(GT_LCL_VAR); + assert(tree->AsLclVarCommon()->GetLclNum() == fieldLclIndex); + tree->gtType = fldVarDsc->TypeGet(); + + if ((parent->gtOper == GT_ASG) && (parent->AsOp()->gtOp1 == tree)) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } + JITDUMP("Replacing the GT_LCL_FLD in promoted struct with local var V%02u\n", fieldLclIndex); + } + else + { + // There is no existing field that has all the parts that we need + // So we must ensure that the struct lives in memory. + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); + +#ifdef DEBUG + // We can't convert this guy to a float because he really does have his + // address taken.. + varDsc->lvKeepType = 1; +#endif // DEBUG + } + } + else if (varTypeIsSIMD(varDsc) && (genTypeSize(tree->TypeGet()) == genTypeSize(varDsc))) + { + assert(tree->AsLclFld()->GetLclOffs() == 0); + tree->gtType = varDsc->TypeGet(); + tree->ChangeOper(GT_LCL_VAR); + JITDUMP("Replacing GT_LCL_FLD of struct with local var V%02u\n", lclNum); + } + } +} + +//------------------------------------------------------------------------ +// fgResetImplicitByRefRefCount: Clear the ref count field of all implicit byrefs + +void Compiler::fgResetImplicitByRefRefCount() +{ +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#ifdef DEBUG + if (verbose) + { + printf("\n*************** In fgResetImplicitByRefRefCount()\n"); + } +#endif // DEBUG + + for (unsigned lclNum = 0; lclNum < info.compArgsCount; ++lclNum) + { + LclVarDsc* varDsc = lvaGetDesc(lclNum); + + if (varDsc->lvIsImplicitByRef) + { + // Clear the ref count field; fgMarkAddressTakenLocals will increment it per + // appearance of implicit-by-ref param so that call arg morphing can do an + // optimization for single-use implicit-by-ref params whose single use is as + // an outgoing call argument. + varDsc->setLvRefCnt(0, RCS_EARLY); + } + } + +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +} + +//------------------------------------------------------------------------ +// fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from +// struct to pointer). Also choose (based on address-exposed analysis) +// which struct promotions of implicit byrefs to keep or discard. +// For those which are kept, insert the appropriate initialization code. +// For those which are to be discarded, annotate the promoted field locals +// so that fgMorphImplicitByRefArgs will know to rewrite their appearances +// using indirections off the pointer parameters. + +void Compiler::fgRetypeImplicitByRefArgs() +{ +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) +#ifdef DEBUG + if (verbose) + { + printf("\n*************** In fgRetypeImplicitByRefArgs()\n"); + } +#endif // DEBUG + + for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) + { + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (lvaIsImplicitByRefLocal(lclNum)) + { + unsigned size; + + if (varDsc->lvSize() > REGSIZE_BYTES) + { + size = varDsc->lvSize(); + } + else + { + CORINFO_CLASS_HANDLE typeHnd = varDsc->GetStructHnd(); + size = info.compCompHnd->getClassSize(typeHnd); + } + + if (varDsc->lvPromoted) + { + // This implicit-by-ref was promoted; create a new temp to represent the + // promoted struct before rewriting this parameter as a pointer. + unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref")); + lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true); + if (info.compIsVarArgs) + { + lvaSetStructUsedAsVarArg(newLclNum); + } + + // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array. + varDsc = &lvaTable[lclNum]; + + // Copy the struct promotion annotations to the new temp. + LclVarDsc* newVarDsc = &lvaTable[newLclNum]; + newVarDsc->lvPromoted = true; + newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart; + newVarDsc->lvFieldCnt = varDsc->lvFieldCnt; + newVarDsc->lvContainsHoles = varDsc->lvContainsHoles; + newVarDsc->lvCustomLayout = varDsc->lvCustomLayout; +#ifdef DEBUG + newVarDsc->lvKeepType = true; +#endif // DEBUG + + // Propagate address-taken-ness and do-not-enregister-ness. + newVarDsc->lvAddrExposed = varDsc->lvAddrExposed; + newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister; +#ifdef DEBUG + newVarDsc->lvLclBlockOpAddr = varDsc->lvLclBlockOpAddr; + newVarDsc->lvLclFieldExpr = varDsc->lvLclFieldExpr; + newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr; + newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr; + newVarDsc->lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall; +#endif // DEBUG + + // If the promotion is dependent, the promoted temp would just be committed + // to memory anyway, so we'll rewrite its appearances to be indirections + // through the pointer parameter, the same as we'd do for this + // parameter if it weren't promoted at all (otherwise the initialization + // of the new temp would just be a needless memcpy at method entry). + // + // Otherwise, see how many appearances there are. We keep two early ref counts: total + // number of references to the struct or some field, and how many of these are + // arguments to calls. We undo promotion unless we see enough non-call uses. + // + const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY); + const unsigned callAppearances = (unsigned)varDsc->lvRefCntWtd(RCS_EARLY); + assert(totalAppearances >= callAppearances); + const unsigned nonCallAppearances = totalAppearances - callAppearances; + + bool undoPromotion = ((lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || + (nonCallAppearances <= varDsc->lvFieldCnt)); + +#ifdef DEBUG + // Above is a profitability heurisic; either value of + // undoPromotion should lead to correct code. So, + // under stress, make different decisions at times. + if (compStressCompile(STRESS_BYREF_PROMOTION, 25)) + { + undoPromotion = !undoPromotion; + JITDUMP("Stress -- changing byref undo promotion for V%02u to %s undo\n", lclNum, + undoPromotion ? "" : "NOT"); + } +#endif // DEBUG + + JITDUMP("%s promotion of implicit by-ref V%02u: %s total: %u non-call: %u fields: %u\n", + undoPromotion ? "Undoing" : "Keeping", lclNum, + (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ? "dependent;" : "", + totalAppearances, nonCallAppearances, varDsc->lvFieldCnt); + + if (!undoPromotion) + { + // Insert IR that initializes the temp from the parameter. + // LHS is a simple reference to the temp. + fgEnsureFirstBBisScratch(); + GenTree* lhs = gtNewLclvNode(newLclNum, varDsc->lvType); + // RHS is an indirection (using GT_OBJ) off the parameter. + GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF); + GenTree* rhs = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, typGetBlkLayout(size)); + GenTree* assign = gtNewAssignNode(lhs, rhs); + fgNewStmtAtBeg(fgFirstBB, assign); + } + + // Update the locals corresponding to the promoted fields. + unsigned fieldLclStart = varDsc->lvFieldLclStart; + unsigned fieldCount = varDsc->lvFieldCnt; + unsigned fieldLclStop = fieldLclStart + fieldCount; + + for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) + { + LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; + + if (undoPromotion) + { + // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs + // will know to rewrite appearances of this local. + assert(fieldVarDsc->lvParentLcl == lclNum); + } + else + { + // Set the new parent. + fieldVarDsc->lvParentLcl = newLclNum; + // Clear the ref count field; it is used to communicate the number of references + // to the implicit byref parameter when morphing calls that pass the implicit byref + // out as an outgoing argument value, but that doesn't pertain to this field local + // which is now a field of a non-arg local. + fieldVarDsc->setLvRefCnt(0, RCS_EARLY); + } + + fieldVarDsc->lvIsParam = false; + // The fields shouldn't inherit any register preferences from + // the parameter which is really a pointer to the struct. + fieldVarDsc->lvIsRegArg = false; + fieldVarDsc->lvIsMultiRegArg = false; + fieldVarDsc->SetArgReg(REG_NA); +#if FEATURE_MULTIREG_ARGS + fieldVarDsc->SetOtherArgReg(REG_NA); +#endif + } + + // Hijack lvFieldLclStart to record the new temp number. + // It will get fixed up in fgMarkDemotedImplicitByRefArgs. + varDsc->lvFieldLclStart = newLclNum; + // Go ahead and clear lvFieldCnt -- either we're promoting + // a replacement temp or we're not promoting this arg, and + // in either case the parameter is now a pointer that doesn't + // have these fields. + varDsc->lvFieldCnt = 0; + + // Hijack lvPromoted to communicate to fgMorphImplicitByRefArgs + // whether references to the struct should be rewritten as + // indirections off the pointer (not promoted) or references + // to the new struct local (promoted). + varDsc->lvPromoted = !undoPromotion; + } + else + { + // The "undo promotion" path above clears lvPromoted for args that struct + // promotion wanted to promote but that aren't considered profitable to + // rewrite. It hijacks lvFieldLclStart to communicate to + // fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left + // on such args for fgMorphImplicitByRefArgs to consult in the interim. + // Here we have an arg that was simply never promoted, so make sure it doesn't + // have nonzero lvFieldLclStart, since that would confuse fgMorphImplicitByRefArgs + // and fgMarkDemotedImplicitByRefArgs. + assert(varDsc->lvFieldLclStart == 0); + } + + // Since the parameter in this position is really a pointer, its type is TYP_BYREF. + varDsc->lvType = TYP_BYREF; + + // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF + // make sure that the following flag is not set as these will force SSA to + // exclude tracking/enregistering these LclVars. (see SsaBuilder::IncludeInSsa) + // + varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it. + + // The struct parameter may have had its address taken, but the pointer parameter + // cannot -- any uses of the struct parameter's address are uses of the pointer + // parameter's value, and there's no way for the MSIL to reference the pointer + // parameter's address. So clear the address-taken bit for the parameter. + varDsc->lvAddrExposed = 0; + varDsc->lvDoNotEnregister = 0; + +#ifdef DEBUG + // This should not be converted to a double in stress mode, + // because it is really a pointer + varDsc->lvKeepType = 1; + + if (verbose) + { + printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum); + } +#endif // DEBUG + } + } + +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +} + +//------------------------------------------------------------------------ +// fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion +// asked to promote. Appearances of these have now been rewritten +// (by fgMorphImplicitByRefArgs) using indirections from the pointer +// parameter or references to the promotion temp, as appropriate. + +void Compiler::fgMarkDemotedImplicitByRefArgs() +{ +#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) + + for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) + { + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (lvaIsImplicitByRefLocal(lclNum)) + { + if (varDsc->lvPromoted) + { + // The parameter is simply a pointer now, so clear lvPromoted. It was left set + // by fgRetypeImplicitByRefArgs to communicate to fgMorphImplicitByRefArgs that + // appearances of this arg needed to be rewritten to a new promoted struct local. + varDsc->lvPromoted = false; + + // Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs + // to tell fgMorphImplicitByRefArgs which local is the new promoted struct one. + varDsc->lvFieldLclStart = 0; + } + else if (varDsc->lvFieldLclStart != 0) + { + // We created new temps to represent a promoted struct corresponding to this + // parameter, but decided not to go through with the promotion and have + // rewritten all uses as indirections off the pointer parameter. + // We stashed the pointer to the new struct temp in lvFieldLclStart; make + // note of that and clear the annotation. + unsigned structLclNum = varDsc->lvFieldLclStart; + varDsc->lvFieldLclStart = 0; + + // Clear the arg's ref count; this was set during address-taken analysis so that + // call morphing could identify single-use implicit byrefs; we're done with + // that, and want it to be in its default state of zero when we go to set + // real ref counts for all variables. + varDsc->setLvRefCnt(0, RCS_EARLY); + + // The temp struct is now unused; set flags appropriately so that we + // won't allocate space for it on the stack. + LclVarDsc* structVarDsc = &lvaTable[structLclNum]; + structVarDsc->setLvRefCnt(0, RCS_EARLY); + structVarDsc->lvAddrExposed = false; +#ifdef DEBUG + structVarDsc->lvUnusedStruct = true; +#endif // DEBUG + + unsigned fieldLclStart = structVarDsc->lvFieldLclStart; + unsigned fieldCount = structVarDsc->lvFieldCnt; + unsigned fieldLclStop = fieldLclStart + fieldCount; + + for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum) + { + // Fix the pointer to the parent local. + LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; + assert(fieldVarDsc->lvParentLcl == lclNum); + fieldVarDsc->lvParentLcl = structLclNum; + + // The field local is now unused; set flags appropriately so that + // we won't allocate stack space for it. + fieldVarDsc->setLvRefCnt(0, RCS_EARLY); + fieldVarDsc->lvAddrExposed = false; + } + } + } + } + +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +} + +/***************************************************************************** + * + * Morph irregular parameters + * for x64 and ARM64 this means turning them into byrefs, adding extra indirs. + */ +bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree) +{ +#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) + + return false; + +#else // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 + + bool changed = false; + + // Implicit byref morphing needs to know if the reference to the parameter is a + // child of GT_ADDR or not, so this method looks one level down and does the + // rewrite whenever a child is a reference to an implicit byref parameter. + if (tree->gtOper == GT_ADDR) + { + if (tree->AsOp()->gtOp1->gtOper == GT_LCL_VAR) + { + GenTree* morphedTree = fgMorphImplicitByRefArgs(tree, true); + changed = (morphedTree != nullptr); + assert(!changed || (morphedTree == tree)); + } + } + else + { + for (GenTree** pTree : tree->UseEdges()) + { + GenTree** pTreeCopy = pTree; + GenTree* childTree = *pTree; + if (childTree->gtOper == GT_LCL_VAR) + { + GenTree* newChildTree = fgMorphImplicitByRefArgs(childTree, false); + if (newChildTree != nullptr) + { + changed = true; + *pTreeCopy = newChildTree; + } + } + } + } + + return changed; +#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 +} + +GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr) +{ + assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->AsOp()->gtOp1->gtOper == GT_LCL_VAR))); + assert(isAddr == (tree->gtOper == GT_ADDR)); + + GenTree* lclVarTree = isAddr ? tree->AsOp()->gtOp1 : tree; + unsigned lclNum = lclVarTree->AsLclVarCommon()->GetLclNum(); + LclVarDsc* lclVarDsc = &lvaTable[lclNum]; + + CORINFO_FIELD_HANDLE fieldHnd; + unsigned fieldOffset = 0; + var_types fieldRefType = TYP_UNKNOWN; + + if (lvaIsImplicitByRefLocal(lclNum)) + { + // The SIMD transformation to coalesce contiguous references to SIMD vector fields will + // re-invoke the traversal to mark address-taken locals. + // So, we may encounter a tree that has already been transformed to TYP_BYREF. + // If we do, leave it as-is. + if (!varTypeIsStruct(lclVarTree)) + { + assert(lclVarTree->TypeGet() == TYP_BYREF); + + return nullptr; + } + else if (lclVarDsc->lvPromoted) + { + // fgRetypeImplicitByRefArgs created a new promoted struct local to represent this + // arg. Rewrite this to refer to the new local. + assert(lclVarDsc->lvFieldLclStart != 0); + lclVarTree->AsLclVarCommon()->SetLclNum(lclVarDsc->lvFieldLclStart); + return tree; + } + + fieldHnd = nullptr; + } + else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl)) + { + // This was a field reference to an implicit-by-reference struct parameter that was + // dependently promoted; update it to a field reference off the pointer. + // Grab the field handle from the struct field lclVar. + fieldHnd = lclVarDsc->lvFieldHnd; + fieldOffset = lclVarDsc->lvFldOffset; + assert(fieldHnd != nullptr); + // Update lclNum/lclVarDsc to refer to the parameter + lclNum = lclVarDsc->lvParentLcl; + lclVarDsc = &lvaTable[lclNum]; + fieldRefType = lclVarTree->TypeGet(); + } + else + { + // We only need to tranform the 'marked' implicit by ref parameters + return nullptr; + } + + // This is no longer a def of the lclVar, even if it WAS a def of the struct. + lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK); + + if (isAddr) + { + if (fieldHnd == nullptr) + { + // change &X into just plain X + tree->ReplaceWith(lclVarTree, this); + tree->gtType = TYP_BYREF; + } + else + { + // change &(X.f) [i.e. GT_ADDR of local for promoted arg field] + // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param] + lclVarTree->AsLclVarCommon()->SetLclNum(lclNum); + lclVarTree->gtType = TYP_BYREF; + tree->AsOp()->gtOp1 = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset); + } + +#ifdef DEBUG + if (verbose) + { + printf("Replacing address of implicit by ref struct parameter with byref:\n"); + } +#endif // DEBUG + } + else + { + // Change X into OBJ(X) or FIELD(X, f) + var_types structType = tree->gtType; + tree->gtType = TYP_BYREF; + + if (fieldHnd) + { + tree->AsLclVarCommon()->SetLclNum(lclNum); + tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset); + } + else + { + tree = gtNewObjNode(lclVarDsc->GetStructHnd(), tree); + + if (structType == TYP_STRUCT) + { + gtSetObjGcInfo(tree->AsObj()); + } + } + + // TODO-CQ: If the VM ever stops violating the ABI and passing heap references + // we could remove TGTANYWHERE + tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE); + +#ifdef DEBUG + if (verbose) + { + printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n"); + } +#endif // DEBUG + } + +#ifdef DEBUG + if (verbose) + { + gtDispTree(tree); + } +#endif // DEBUG + + return tree; +} + +//------------------------------------------------------------------------ +// fgAddFieldSeqForZeroOffset: +// Associate a fieldSeq (with a zero offset) with the GenTree node 'addr' +// +// Arguments: +// addr - A GenTree node +// fieldSeqZero - a fieldSeq (with a zero offset) +// +// Notes: +// Some GenTree nodes have internal fields that record the field sequence. +// If we have one of these nodes: GT_CNS_INT, GT_LCL_FLD +// we can append the field sequence using the gtFieldSeq +// If we have a GT_ADD of a GT_CNS_INT we can use the +// fieldSeq from child node. +// Otherwise we record 'fieldSeqZero' in the GenTree node using +// a Map: GetFieldSeqStore() +// When doing so we take care to preserve any existing zero field sequence +// +void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero) +{ + // We expect 'addr' to be an address at this point. + assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); + + // Tunnel through any commas. + const bool commaOnly = true; + addr = addr->gtEffectiveVal(commaOnly); + + // We still expect 'addr' to be an address at this point. + assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF); + + FieldSeqNode* fieldSeqUpdate = fieldSeqZero; + GenTree* fieldSeqNode = addr; + bool fieldSeqRecorded = false; + +#ifdef DEBUG + if (verbose) + { + printf("\nfgAddFieldSeqForZeroOffset for"); + gtDispFieldSeq(fieldSeqZero); + + printf("\naddr (Before)\n"); + gtDispNode(addr, nullptr, nullptr, false); + gtDispCommonEndLine(addr); + } +#endif // DEBUG + + switch (addr->OperGet()) + { + case GT_CNS_INT: + fieldSeqUpdate = GetFieldSeqStore()->Append(addr->AsIntCon()->gtFieldSeq, fieldSeqZero); + addr->AsIntCon()->gtFieldSeq = fieldSeqUpdate; + fieldSeqRecorded = true; + break; + + case GT_LCL_FLD: + { + GenTreeLclFld* lclFld = addr->AsLclFld(); + fieldSeqUpdate = GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeqZero); + lclFld->SetFieldSeq(fieldSeqUpdate); + fieldSeqRecorded = true; + break; + } + + case GT_ADDR: + if (addr->AsOp()->gtOp1->OperGet() == GT_LCL_FLD) + { + fieldSeqNode = addr->AsOp()->gtOp1; + + GenTreeLclFld* lclFld = addr->AsOp()->gtOp1->AsLclFld(); + fieldSeqUpdate = GetFieldSeqStore()->Append(lclFld->GetFieldSeq(), fieldSeqZero); + lclFld->SetFieldSeq(fieldSeqUpdate); + fieldSeqRecorded = true; + } + break; + + case GT_ADD: + if (addr->AsOp()->gtOp1->OperGet() == GT_CNS_INT) + { + fieldSeqNode = addr->AsOp()->gtOp1; + + fieldSeqUpdate = GetFieldSeqStore()->Append(addr->AsOp()->gtOp1->AsIntCon()->gtFieldSeq, fieldSeqZero); + addr->AsOp()->gtOp1->AsIntCon()->gtFieldSeq = fieldSeqUpdate; + fieldSeqRecorded = true; + } + else if (addr->AsOp()->gtOp2->OperGet() == GT_CNS_INT) + { + fieldSeqNode = addr->AsOp()->gtOp2; + + fieldSeqUpdate = GetFieldSeqStore()->Append(addr->AsOp()->gtOp2->AsIntCon()->gtFieldSeq, fieldSeqZero); + addr->AsOp()->gtOp2->AsIntCon()->gtFieldSeq = fieldSeqUpdate; + fieldSeqRecorded = true; + } + break; + + default: + break; + } + + if (fieldSeqRecorded == false) + { + // Record in the general zero-offset map. + + // The "addr" node might already be annotated with a zero-offset field sequence. + FieldSeqNode* existingFieldSeq = nullptr; + if (GetZeroOffsetFieldMap()->Lookup(addr, &existingFieldSeq)) + { + // Append the zero field sequences + fieldSeqUpdate = GetFieldSeqStore()->Append(existingFieldSeq, fieldSeqZero); + } + // Overwrite the field sequence annotation for op1 + GetZeroOffsetFieldMap()->Set(addr, fieldSeqUpdate, NodeToFieldSeqMap::Overwrite); + fieldSeqRecorded = true; + } + +#ifdef DEBUG + if (verbose) + { + printf(" (After)\n"); + gtDispNode(fieldSeqNode, nullptr, nullptr, false); + gtDispCommonEndLine(fieldSeqNode); + } +#endif // DEBUG +} + +#ifdef FEATURE_SIMD + +//----------------------------------------------------------------------------------- +// fgMorphCombineSIMDFieldAssignments: +// If the RHS of the input stmt is a read for simd vector X Field, then this function +// will keep reading next few stmts based on the vector size(2, 3, 4). +// If the next stmts LHS are located contiguous and RHS are also located +// contiguous, then we replace those statements with a copyblk. +// +// Argument: +// block - BasicBlock*. block which stmt belongs to +// stmt - Statement*. the stmt node we want to check +// +// return value: +// if this funciton successfully optimized the stmts, then return true. Otherwise +// return false; + +bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt) +{ + GenTree* tree = stmt->GetRootNode(); + assert(tree->OperGet() == GT_ASG); + + GenTree* originalLHS = tree->AsOp()->gtOp1; + GenTree* prevLHS = tree->AsOp()->gtOp1; + GenTree* prevRHS = tree->AsOp()->gtOp2; + unsigned index = 0; + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + unsigned simdSize = 0; + GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &simdBaseJitType, &index, &simdSize, true); + + if (simdStructNode == nullptr || index != 0 || simdBaseJitType != CORINFO_TYPE_FLOAT) + { + // if the RHS is not from a SIMD vector field X, then there is no need to check further. + return false; + } + + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + var_types simdType = getSIMDTypeForSize(simdSize); + int assignmentsCount = simdSize / genTypeSize(simdBaseType) - 1; + int remainingAssignments = assignmentsCount; + Statement* curStmt = stmt->GetNextStmt(); + Statement* lastStmt = stmt; + + while (curStmt != nullptr && remainingAssignments > 0) + { + GenTree* exp = curStmt->GetRootNode(); + if (exp->OperGet() != GT_ASG) + { + break; + } + GenTree* curLHS = exp->gtGetOp1(); + GenTree* curRHS = exp->gtGetOp2(); + + if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS)) + { + break; + } + + remainingAssignments--; + prevLHS = curLHS; + prevRHS = curRHS; + + lastStmt = curStmt; + curStmt = curStmt->GetNextStmt(); + } + + if (remainingAssignments > 0) + { + // if the left assignments number is bigger than zero, then this means + // that the assignments are not assgining to the contiguously memory + // locations from same vector. + return false; + } +#ifdef DEBUG + if (verbose) + { + printf("\nFound contiguous assignments from a SIMD vector to memory.\n"); + printf("From " FMT_BB ", stmt ", block->bbNum); + printStmtID(stmt); + printf(" to stmt"); + printStmtID(lastStmt); + printf("\n"); + } +#endif + + for (int i = 0; i < assignmentsCount; i++) + { + fgRemoveStmt(block, stmt->GetNextStmt()); + } + + GenTree* dstNode; + + if (originalLHS->OperIs(GT_LCL_FLD)) + { + dstNode = originalLHS; + dstNode->gtType = simdType; + dstNode->AsLclFld()->SetFieldSeq(FieldSeqStore::NotAField()); + + // This may have changed a partial local field into full local field + if (dstNode->IsPartialLclFld(this)) + { + dstNode->gtFlags |= GTF_VAR_USEASG; + } + else + { + dstNode->gtFlags &= ~GTF_VAR_USEASG; + } + } + else + { + GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); + if (simdStructNode->OperIsLocal()) + { + setLclRelatedToSIMDIntrinsic(simdStructNode); + } + GenTree* copyBlkAddr = copyBlkDst; + if (copyBlkAddr->gtOper == GT_LEA) + { + copyBlkAddr = copyBlkAddr->AsAddrMode()->Base(); + } + GenTreeLclVarCommon* localDst = nullptr; + if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr)) + { + setLclRelatedToSIMDIntrinsic(localDst); + } + + if (simdStructNode->TypeGet() == TYP_BYREF) + { + assert(simdStructNode->OperIsLocal()); + assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->GetLclNum())); + simdStructNode = gtNewIndir(simdType, simdStructNode); + } + else + { + assert(varTypeIsSIMD(simdStructNode)); + } + + dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst); + } + +#ifdef DEBUG + if (verbose) + { + printf("\n" FMT_BB " stmt ", block->bbNum); + printStmtID(stmt); + printf("(before)\n"); + gtDispStmt(stmt); + } +#endif + + assert(!simdStructNode->CanCSE()); + simdStructNode->ClearDoNotCSE(); + + tree = gtNewAssignNode(dstNode, simdStructNode); + + stmt->SetRootNode(tree); + + // Since we generated a new address node which didn't exist before, + // we should expose this address manually here. + // TODO-ADDR: Remove this when LocalAddressVisitor transforms all + // local field access into LCL_FLDs, at that point we would be + // combining 2 existing LCL_FLDs or 2 FIELDs that do not reference + // a local and thus cannot result in a new address exposed local. + fgMarkAddressExposedLocals(stmt); + +#ifdef DEBUG + if (verbose) + { + printf("\nReplaced " FMT_BB " stmt", block->bbNum); + printStmtID(stmt); + printf("(after)\n"); + gtDispStmt(stmt); + } +#endif + return true; +} + +#endif // FEATURE_SIMD + +//------------------------------------------------------------------------ +// fgCheckStmtAfterTailCall: check that statements after the tail call stmt +// candidate are in one of expected forms, that are desctibed below. +// +// Return Value: +// 'true' if stmts are in the expected form, else 'false'. +// +bool Compiler::fgCheckStmtAfterTailCall() +{ + + // For void calls, we would have created a GT_CALL in the stmt list. + // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)). + // For calls returning structs, we would have a void call, followed by a void return. + // For debuggable code, it would be an assignment of the call to a temp + // We want to get rid of any of this extra trees, and just leave + // the call. + Statement* callStmt = fgMorphStmt; + + Statement* nextMorphStmt = callStmt->GetNextStmt(); + + // Check that the rest stmts in the block are in one of the following pattern: + // 1) ret(void) + // 2) ret(cast*(callResultLclVar)) + // 3) lclVar = callResultLclVar, the actual ret(lclVar) in another block + if (nextMorphStmt != nullptr) + { + GenTree* callExpr = callStmt->GetRootNode(); + if (callExpr->gtOper != GT_ASG) + { + // The next stmt can be GT_RETURN(TYP_VOID) or GT_RETURN(lclVar), + // where lclVar was return buffer in the call for structs or simd. + Statement* retStmt = nextMorphStmt; + GenTree* retExpr = retStmt->GetRootNode(); + noway_assert(retExpr->gtOper == GT_RETURN); + + nextMorphStmt = retStmt->GetNextStmt(); + } + else + { + noway_assert(callExpr->gtGetOp1()->OperIsLocal()); + unsigned callResultLclNumber = callExpr->gtGetOp1()->AsLclVarCommon()->GetLclNum(); + +#if FEATURE_TAILCALL_OPT_SHARED_RETURN + + // We can have a chain of assignments from the call result to + // various inline return spill temps. These are ok as long + // as the last one ultimately provides the return value or is ignored. + // + // And if we're returning a small type we may see a cast + // on the source side. + while ((nextMorphStmt != nullptr) && (nextMorphStmt->GetRootNode()->OperIs(GT_ASG))) + { + Statement* moveStmt = nextMorphStmt; + GenTree* moveExpr = nextMorphStmt->GetRootNode(); + GenTree* moveDest = moveExpr->gtGetOp1(); + noway_assert(moveDest->OperIsLocal()); + + // Tunnel through any casts on the source side. + GenTree* moveSource = moveExpr->gtGetOp2(); + while (moveSource->OperIs(GT_CAST)) + { + noway_assert(!moveSource->gtOverflow()); + moveSource = moveSource->gtGetOp1(); + } + noway_assert(moveSource->OperIsLocal()); + + // Verify we're just passing the value from one local to another + // along the chain. + const unsigned srcLclNum = moveSource->AsLclVarCommon()->GetLclNum(); + noway_assert(srcLclNum == callResultLclNumber); + const unsigned dstLclNum = moveDest->AsLclVarCommon()->GetLclNum(); + callResultLclNumber = dstLclNum; + + nextMorphStmt = moveStmt->GetNextStmt(); + } + if (nextMorphStmt != nullptr) +#endif + { + Statement* retStmt = nextMorphStmt; + GenTree* retExpr = nextMorphStmt->GetRootNode(); + noway_assert(retExpr->gtOper == GT_RETURN); + + GenTree* treeWithLcl = retExpr->gtGetOp1(); + while (treeWithLcl->gtOper == GT_CAST) + { + noway_assert(!treeWithLcl->gtOverflow()); + treeWithLcl = treeWithLcl->gtGetOp1(); + } + + noway_assert(callResultLclNumber == treeWithLcl->AsLclVarCommon()->GetLclNum()); + + nextMorphStmt = retStmt->GetNextStmt(); + } + } + } + return nextMorphStmt == nullptr; +} + +//------------------------------------------------------------------------ +// fgCanTailCallViaJitHelper: check whether we can use the faster tailcall +// JIT helper on x86. +// +// Return Value: +// 'true' if we can; or 'false' if we should use the generic tailcall mechanism. +// +bool Compiler::fgCanTailCallViaJitHelper() +{ +#ifndef TARGET_X86 + // On anything except X86 we have no faster mechanism available. + return false; +#else + // The JIT helper does not properly handle the case where localloc was used. + if (compLocallocUsed) + return false; + + return true; +#endif +} + +static const int numberOfTrackedFlags = 5; +static const unsigned trackedFlags[numberOfTrackedFlags] = {GTF_ASG, GTF_CALL, GTF_EXCEPT, GTF_GLOB_REF, + GTF_ORDER_SIDEEFF}; + +//------------------------------------------------------------------------ +// fgMorphArgList: morph argument list tree without recursion. +// +// Arguments: +// args - argument list tree to morph; +// mac - morph address context, used to morph children. +// +// Return Value: +// morphed argument list. +// +GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac) +{ + // Use a non-recursive algorithm that morphs all actual list values, + // memorizes the last node for each effect flag and resets + // them during the second iteration. + assert((trackedFlags[0] | trackedFlags[1] | trackedFlags[2] | trackedFlags[3] | trackedFlags[4]) == GTF_ALL_EFFECT); + + GenTree* memorizedLastNodes[numberOfTrackedFlags] = {nullptr}; + + for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) + { + // Morph actual list values. + GenTree*& arg = listNode->Current(); + arg = fgMorphTree(arg, mac); + + // Remember the last list node with each flag. + for (int i = 0; i < numberOfTrackedFlags; ++i) + { + if ((arg->gtFlags & trackedFlags[i]) != 0) + { + memorizedLastNodes[i] = listNode; + } + } + } + + for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) + { + // Clear all old effects from the list node. + listNode->gtFlags &= ~GTF_ALL_EFFECT; + + // Spread each flag to all list nodes (to the prefix) before the memorized last node. + for (int i = 0; i < numberOfTrackedFlags; ++i) + { + if (memorizedLastNodes[i] != nullptr) + { + listNode->gtFlags |= trackedFlags[i]; + } + if (listNode == memorizedLastNodes[i]) + { + memorizedLastNodes[i] = nullptr; + } + } + } + + return args; +} From eb88feeda4cbe753ea7c92fe29af556334ed01fc Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Tue, 4 May 2021 20:03:18 +0300 Subject: [PATCH 11/30] Update codegenxarch.cpp --- src/coreclr/jit/codegenxarch.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index dcb60dc06c4ea..68e9c313634c1 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4109,9 +4109,11 @@ void CodeGen::genCodeForShift(GenTree* tree) GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); } break; + // Optimize "X<<2" to "lea[reg*4]" case 2: GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 4, 0); break; + // Optimize "X<<3" to "lea[reg*8]" case 3: GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 8, 0); break; From 54cf11d5e3faa6bf68e50e27006ae0c5eb1ef666 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Tue, 4 May 2021 20:04:48 +0300 Subject: [PATCH 12/30] Update morph.cpp --- src/coreclr/jit/morph.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 85bb66d23abde..2a1d0f09acd39 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14070,6 +14070,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) case GT_OR: case GT_XOR: case GT_AND: + // Optimize "X & (-X)" to "ret 1" if (op2->OperIs(GT_NEG) && op1->IsLocal() && varTypeIsIntOrI(op1->TypeGet())) { if (tree->Compare(op1, op2->gtGetOp1())) From 8fd237578751378373413ff1761246d8b9890e52 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Tue, 4 May 2021 20:06:24 +0300 Subject: [PATCH 13/30] Update codegenxarch.cpp --- src/coreclr/jit/codegenxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 68e9c313634c1..a2a02390d6c29 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4109,11 +4109,11 @@ void CodeGen::genCodeForShift(GenTree* tree) GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); } break; - // Optimize "X<<2" to "lea[reg*4]" + // Optimize "X<<2" to "lea [reg*4]" case 2: GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 4, 0); break; - // Optimize "X<<3" to "lea[reg*8]" + // Optimize "X<<3" to "lea [reg*8]" case 3: GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 8, 0); break; From 3ef0e2465d9bd44803903f64dfd919860d2a04ff Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Tue, 4 May 2021 20:28:08 +0300 Subject: [PATCH 14/30] Update morph.cpp --- src/coreclr/jit/morph.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2a1d0f09acd39..41f45f3cb852a 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14070,16 +14070,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) case GT_OR: case GT_XOR: case GT_AND: - // Optimize "X & (-X)" to "ret 1" - if (op2->OperIs(GT_NEG) && op1->IsLocal() && varTypeIsIntOrI(op1->TypeGet())) - { - if (tree->Compare(op1, op2->gtGetOp1())) - { - tree->SetOper(GT_CNS_INT); - tree->AsIntCon()->gtIconVal = 1; - return tree; - } - } + /* Commute any non-REF constants to the right */ From 512daa7a938fceaad9a5e590a792192dbf9b4c58 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 15:31:30 +0300 Subject: [PATCH 15/30] Update codegenxarch.cpp --- src/coreclr/jit/codegenxarch.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index a2a02390d6c29..f36917293c7bf 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4095,7 +4095,8 @@ void CodeGen::genCodeForShift(GenTree* tree) // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" ssize_t intCon = shiftBy->AsIntConCommon()->IconValue(); - if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && (intCon == 1 || intCon == 2 || intCon == 3)) + if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && + (intCon == 1 || intCon == 2 || intCon == 3)) { switch (intCon) { From e973f620f0eeb2e806f2136513e4fb1febdd718a Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:22:01 +0300 Subject: [PATCH 16/30] Update codegenarm64.cpp --- src/coreclr/jit/codegenarm64.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 31b5046ce75cd..d306c3d438581 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2804,7 +2804,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) genConsumeAddress(addr); genConsumeRegs(data); - emitAttr eDataSize = emitActualTypeSize(data); + emitAttr dataSize = emitActualTypeSize(data); if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { @@ -2813,23 +2813,23 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) switch (treeNode->gtOper) { case GT_XORR: - GetEmitter()->emitIns_R_R_R(INS_ldsetal, eDataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, addrReg); break; case GT_XAND: { // Grab a temp reg to perform `MVN` for dataReg first. regNumber tempReg = treeNode->GetSingleTempReg(); - GetEmitter()->emitIns_R_R(INS_mvn, eDataSize, tempReg, dataReg); - GetEmitter()->emitIns_R_R_R(INS_ldclral, eDataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg); + GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, addrReg); break; } case GT_XCHG: - GetEmitter()->emitIns_R_R_R(INS_swpal, eDataSize, dataReg, targetReg, addrReg); + GetEmitter()->emitIns_R_R_R(INS_swpal, dataSize, dataReg, targetReg, addrReg); break; case GT_XADD: - GetEmitter()->emitIns_R_R_R(INS_ldaddal, eDataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + GetEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, addrReg); break; default: @@ -2887,7 +2887,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) genDefineTempLabel(labelRetry); // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(INS_ldaxr, eDataSize, loadReg, addrReg); + GetEmitter()->emitIns_R_R(INS_ldaxr, dataSize, loadReg, addrReg); switch (treeNode->OperGet()) { @@ -2896,12 +2896,12 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { // Even though INS_add is specified here, the encoder will choose either // an INS_add or an INS_sub and encode the immediate as a positive value - genInstrWithConstant(INS_add, eDataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), + genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), REG_NA); } else { - GetEmitter()->emitIns_R_R_R(INS_add, eDataSize, storeDataReg, loadReg, dataReg); + GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg); } break; case GT_XCHG: @@ -2913,7 +2913,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) } // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(INS_stlxr, eDataSize, exResultReg, storeDataReg, addrReg); + GetEmitter()->emitIns_R_R_R(INS_stlxr, dataSize, exResultReg, storeDataReg, addrReg); GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); @@ -2953,18 +2953,18 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { - emitAttr eDataSize = emitActualTypeSize(data); + emitAttr dataSize = emitActualTypeSize(data); // casal use the comparand as the target reg if (targetReg != comparandReg) { - GetEmitter()->emitIns_R_R(INS_mov, eDataSize, targetReg, comparandReg); + GetEmitter()->emitIns_R_R(INS_mov, dataSize, targetReg, comparandReg); // Catch case we destroyed data or address before use noway_assert(addrReg != targetReg); noway_assert(dataReg != targetReg); } - GetEmitter()->emitIns_R_R_R(INS_casal, eDataSize, targetReg, dataReg, addrReg); + GetEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg); } else { From e32ab33132a9a92e1a0c00f4cc28d57f2b623856 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:22:57 +0300 Subject: [PATCH 17/30] Update morph.cpp --- src/coreclr/jit/morph.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b47841535ac6d..241fdac51208a 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14070,16 +14070,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) case GT_OR: case GT_XOR: case GT_AND: - /*if (op2->OperIs(GT_NEG) && op1->IsLocal() && varTypeIsIntOrI(op1->TypeGet())) - { - if (tree->Compare(op1, op2->gtGetOp1())) - { - tree->SetOper(GT_CNS_INT); - tree->AsIntCon()->gtIconVal = 1; - return tree; - } - }*/ - + /* Commute any non-REF constants to the right */ noway_assert(op1); @@ -19486,4 +19477,4 @@ GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* } return args; -} \ No newline at end of file +} From ce9db99e7da49fc19ab27b98e30e2b461dba46a6 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:23:30 +0300 Subject: [PATCH 18/30] Update morph.cpp --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 241fdac51208a..9e8f37e081933 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14070,7 +14070,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) case GT_OR: case GT_XOR: case GT_AND: - + /* Commute any non-REF constants to the right */ noway_assert(op1); From 44330b16afcf3a3bdf9c2c25d27e26f0fd0bb4f3 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:24:03 +0300 Subject: [PATCH 19/30] Update compiler.h --- src/coreclr/jit/compiler.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c4bc043746136..9172a30575779 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11229,7 +11229,6 @@ extern size_t gcHeaderISize; extern size_t gcPtrMapISize; extern size_t gcHeaderNSize; extern size_t gcPtrMapNSize; -extern size_t dataSize; #endif // DISPLAY_SIZES From 400a3609b2b7c339138846cbc1b60b9aa6af063a Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:24:28 +0300 Subject: [PATCH 20/30] Update gentree.cpp --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4850d100288f6..6548e15896629 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16584,7 +16584,7 @@ bool GenTree::IsPhiDefn() // comp - the Compiler object. // // Return Value: -// Returns "true" if 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type +// Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type // is not the same size as the type of the GT_LCL_VAR bool GenTree::IsPartialLclFld(Compiler* comp) From 9589f2c23bd91e958fa35fd1f4ef8dcc44e6db73 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:25:09 +0300 Subject: [PATCH 21/30] Update compiler.cpp --- src/coreclr/jit/compiler.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1db3fe6d909c6..583f3d0ca91ce 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -7441,9 +7441,6 @@ void Compiler::compJitStats() void Compiler::compCallArgStats() { - GenTree* args = nullptr; - GenTree* argx = nullptr; - unsigned argNum; unsigned argDWordNum; From f782919125afe80f382e4622a644ae3e5f064901 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:26:30 +0300 Subject: [PATCH 22/30] Update codegencommon.cpp --- src/coreclr/jit/codegencommon.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index be2cc16efcaf3..b7326a86d8a42 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2377,12 +2377,6 @@ void CodeGen::genEmitMachineCode() compiler->unwindReserve(); -#if DISPLAY_SIZES - - dataSize = GetEmitter()->emitDataSize(); - -#endif // DISPLAY_SIZES - bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ? #if defined(TARGET_AMD64) || defined(TARGET_ARM64) @@ -2601,7 +2595,8 @@ void CodeGen::genEmitUnwindDebugGCandEH() regSet.tmpDone(); #if DISPLAY_SIZES - + + size_t dataSize = GetEmitter()->emitDataSize(); grossVMsize += compiler->info.compILCodeSize; totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize; grossNCsize += codeSize + dataSize; From 744ed90573d01efa695bcf543390fe309a7ee99e Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:30:18 +0300 Subject: [PATCH 23/30] Update codegencommon.cpp --- src/coreclr/jit/codegencommon.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index b7326a86d8a42..610b0416ab3b0 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2377,6 +2377,12 @@ void CodeGen::genEmitMachineCode() compiler->unwindReserve(); +#if DISPLAY_SIZES + + size_t dataSize = GetEmitter()->emitDataSize(); + +#endif // DISPLAY_SIZES + bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ? #if defined(TARGET_AMD64) || defined(TARGET_ARM64) @@ -2595,8 +2601,7 @@ void CodeGen::genEmitUnwindDebugGCandEH() regSet.tmpDone(); #if DISPLAY_SIZES - - size_t dataSize = GetEmitter()->emitDataSize(); + grossVMsize += compiler->info.compILCodeSize; totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize; grossNCsize += codeSize + dataSize; From 44155186b464f30938cf193debd920735bab3d42 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:31:27 +0300 Subject: [PATCH 24/30] Update codegenxarch.cpp --- src/coreclr/jit/codegenxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 854aa3b409305..73adf575c6db1 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4527,7 +4527,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) else if (op1->GetRegNum() != targetReg) { assert(op1->GetRegNum() != REG_NA); - emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(lclNode), lclNode, op1); + emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(targetType), lclNode, op1); } } if (targetReg != REG_NA) @@ -9019,4 +9019,4 @@ void CodeGen::genPushCalleeSavedRegisters() } } -#endif // TARGET_XARCH \ No newline at end of file +#endif // TARGET_XARCH From 6f5bd46326c46c26a902f607e23c7d97bd24c74f Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:32:32 +0300 Subject: [PATCH 25/30] Update compiler.cpp --- src/coreclr/jit/compiler.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 583f3d0ca91ce..8554c7e436fdf 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -342,7 +342,6 @@ size_t gcHeaderISize; // GC header size: interruptible methods size_t gcPtrMapISize; // GC pointer map size: interruptible methods size_t gcHeaderNSize; // GC header size: non-interruptible methods size_t gcPtrMapNSize; // GC pointer map size: non-interruptible methods -size_t dataSize; #endif // DISPLAY_SIZES @@ -7441,6 +7440,9 @@ void Compiler::compJitStats() void Compiler::compCallArgStats() { + GenTree* args; + GenTree* argx; + unsigned argNum; unsigned argDWordNum; @@ -7499,7 +7501,7 @@ void Compiler::compCallArgStats() regArgDeferred++; argTotalObjPtr++; - if (call->AsCall()->IsVirtual()) + if (call->IsVirtual()) { /* virtual function */ argVirtualCalls++; From a529be451c4f0bc9e3fba3dd69e73ecaf3ef76c0 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:33:13 +0300 Subject: [PATCH 26/30] Update compiler.cpp --- src/coreclr/jit/compiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 8554c7e436fdf..621da35a7b0f9 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -7442,7 +7442,7 @@ void Compiler::compCallArgStats() { GenTree* args; GenTree* argx; - + unsigned argNum; unsigned argDWordNum; From 5247e6f3e53c75c94012c48b138a3d941dacec6f Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 16:33:41 +0300 Subject: [PATCH 27/30] Update compiler.h --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9172a30575779..6c8a6cbf24d8e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9482,7 +9482,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_FLOWGRAPHS #if defined(DEBUG) || defined(INLINE_DATA) - // Method hash is logically const, but computed + // Method hash is logcally const, but computed // on first demand. mutable unsigned compMethodHashPrivate; unsigned compMethodHash() const; From acb2d4c20debe4dee890f51d8969d9e2a3f4452d Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 19:05:55 +0300 Subject: [PATCH 28/30] Fix comments --- src/coreclr/jit/codegenxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 73adf575c6db1..9f960392a300b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4094,6 +4094,8 @@ void CodeGen::genCodeForShift(GenTree* tree) emitAttr size = emitTypeSize(tree); // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" + // Optimize "X<<2" to "lea [reg*4]" + // Optimize "X<<3" to "lea [reg*8]" ssize_t intCon = shiftBy->AsIntConCommon()->IconValue(); if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && (intCon == 1 || intCon == 2 || intCon == 3)) @@ -4110,11 +4112,9 @@ void CodeGen::genCodeForShift(GenTree* tree) GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); } break; - // Optimize "X<<2" to "lea [reg*4]" case 2: GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 4, 0); break; - // Optimize "X<<3" to "lea [reg*8]" case 3: GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 8, 0); break; From cd0a7fdca013db8b4946d3c181b4eb4cf2d26afb Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Wed, 5 May 2021 21:19:41 +0300 Subject: [PATCH 29/30] fix formatting --- src/coreclr/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9f960392a300b..74a4f49db8958 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4098,7 +4098,7 @@ void CodeGen::genCodeForShift(GenTree* tree) // Optimize "X<<3" to "lea [reg*8]" ssize_t intCon = shiftBy->AsIntConCommon()->IconValue(); if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && - (intCon == 1 || intCon == 2 || intCon == 3)) + (intCon == 1 || intCon == 2 || intCon == 3)) { switch (intCon) { From c0f4d7acac207ddf3bbce505c09fcf43f3cfe305 Mon Sep 17 00:00:00 2001 From: Maxim <43318993+DarkBullNull@users.noreply.github.com> Date: Fri, 14 May 2021 21:21:20 +0300 Subject: [PATCH 30/30] intCon to IsIntegralConst() --- src/coreclr/jit/codegenxarch.cpp | 37 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 74a4f49db8958..93626dc30b7bd 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4096,28 +4096,27 @@ void CodeGen::genCodeForShift(GenTree* tree) // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" // Optimize "X<<2" to "lea [reg*4]" // Optimize "X<<3" to "lea [reg*8]" - ssize_t intCon = shiftBy->AsIntConCommon()->IconValue(); if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && - (intCon == 1 || intCon == 2 || intCon == 3)) + (shiftBy->IsIntegralConst(1) || shiftBy->IsIntegralConst(2) || shiftBy->IsIntegralConst(3))) { - switch (intCon) + if (shiftBy->IsIntegralConst(1)) { - case 1: - if (tree->GetRegNum() == operandReg) - { - GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg); - } - else - { - GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); - } - break; - case 2: - GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 4, 0); - break; - case 3: - GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 8, 0); - break; + if (tree->GetRegNum() == operandReg) + { + GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg); + } + else + { + GetEmitter()->emitIns_R_ARX(INS_lea, size, tree->GetRegNum(), operandReg, operandReg, 1, 0); + } + } + else if (shiftBy->IsIntegralConst(2)) + { + GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 4, 0); + } + else + { + GetEmitter()->emitIns_R_AX(INS_lea, size, tree->GetRegNum(), operandReg, 8, 0); } } else