diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index fb7f2f73351af..20a0bd0ab963f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4732,6 +4732,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_PHYSICAL_PROMOTION, &Compiler::PhysicalPromotion); + // Expose candidates for implicit byref last-use copy elision. + DoPhase(this, PHASE_IMPBYREF_COPY_OMISSION, &Compiler::fgMarkImplicitByRefCopyOmissionCandidates); + // Locals tree list is no longer kept valid. fgNodeThreading = NodeThreading::None; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8a2d862d1fe2c..37fe0128d4d5d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -540,8 +540,11 @@ class LclVarDsc unsigned char lvIsTemp : 1; // Short-lifetime compiler temp #if FEATURE_IMPLICIT_BYREFS - unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref. -#endif // FEATURE_IMPLICIT_BYREFS + // Set if the argument is an implicit byref. + unsigned char lvIsImplicitByRef : 1; + // Set if the local appears as a last use that will be passed as an implicit byref. + unsigned char lvIsLastUseCopyOmissionCandidate : 1; +#endif // FEATURE_IMPLICIT_BYREFS #if defined(TARGET_LOONGARCH64) unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64. @@ -4616,7 +4619,6 @@ class Compiler bool fgRemoveRestOfBlock; // true if we know that we will throw bool fgStmtRemoved; // true if we remove statements -> need new DFA - bool fgRemarkGlobalUses; // true if morph should remark global uses after processing a statement enum FlowGraphOrder { @@ -5917,7 +5919,6 @@ class Compiler GenTreeCall* fgMorphArgs(GenTreeCall* call); void fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg); - void fgMarkGlobalUses(Statement* stmt); GenTree* fgMorphLeafLocal(GenTreeLclVarCommon* lclNode); #ifdef TARGET_X86 @@ -6130,6 +6131,9 @@ class Compiler // Reset the refCount for implicit byrefs. void fgResetImplicitByRefRefCount(); + // Identify all candidates for last-use copy omission. + PhaseStatus fgMarkImplicitByRefCopyOmissionCandidates(); + // Change implicit byrefs' types from struct to pointer, and for any that were // promoted, create new promoted struct temps. PhaseStatus fgRetypeImplicitByRefArgs(); diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 460b47f9bca85..fae27a64cb9da 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -45,6 +45,7 @@ CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", CompPhaseNameMacro(PHASE_EARLY_LIVENESS, "Early liveness", false, -1, false) CompPhaseNameMacro(PHASE_PHYSICAL_PROMOTION, "Physical promotion", false, -1, false) CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", false, -1, false) +CompPhaseNameMacro(PHASE_IMPBYREF_COPY_OMISSION, "Identify candidates for implicit byref copy omission", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", false, -1, false) CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", false, -1, false) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3babf06e54161..c420051c63d3c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3955,6 +3955,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) GenTree* argx = arg->GetEarlyNode(); noway_assert(!argx->OperIs(GT_MKREFANY)); +#if FEATURE_IMPLICIT_BYREFS // If we're optimizing, see if we can avoid making a copy. // // We don't need a copy if this is the last use of the local. @@ -3992,7 +3993,8 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) if (!omitCopy && fgGlobalMorph) { - omitCopy = !varDsc->lvPromoted && !varDsc->lvIsStructField && ((lcl->gtFlags & GTF_VAR_DEATH) != 0); + omitCopy = (varDsc->lvIsLastUseCopyOmissionCandidate || (implicitByRefLcl != nullptr)) && + !varDsc->lvPromoted && !varDsc->lvIsStructField && ((lcl->gtFlags & GTF_VAR_DEATH) != 0); } if (omitCopy) @@ -4007,21 +4009,11 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) lcl->ChangeOper(GT_LCL_ADDR); lcl->AsLclFld()->SetLclOffs(offs); lcl->gtType = TYP_I_IMPL; + lcl->gtFlags &= ~GTF_ALL_EFFECT; lvaSetVarAddrExposed(varNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); // Copy prop could allow creating another later use of lcl if there are live assertions about it. fgKillDependentAssertions(varNum DEBUGARG(lcl)); - - // We may have seen previous uses of this local already, - // and those uses should now be marked as GTF_GLOB_REF - // since otherwise they could be reordered with the call. - // The only known issues are under stress mode and happen - // in the same statement, so we only handle this case currently. - // TODO: A more complete fix will likely entail identifying - // these candidates before morph and address exposing them - // at that point, which first requires ABI determination to - // be moved earlier. - fgRemarkGlobalUses = true; } JITDUMP("did not need to make outgoing copy for last use of V%02d\n", varNum); @@ -4029,6 +4021,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) } } } +#endif JITDUMP("making an outgoing copy for struct arg\n"); @@ -4102,51 +4095,6 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) arg->SetEarlyNode(argNode); } -//------------------------------------------------------------------------ -// fgMarkNewlyGlobalUses: Given a local that is newly address exposed, add -// GTF_GLOB_REF whever necessary in the specified statement. -// -// Arguments: -// stmt - The statement -// lclNum - The local that is newly address exposed -// -// Notes: -// See comment in fgMakeOutgoingStructArgCopy. -// -void Compiler::fgMarkGlobalUses(Statement* stmt) -{ - struct Visitor : GenTreeVisitor - { - enum - { - DoPostOrder = true, - }; - - Visitor(Compiler* comp) : GenTreeVisitor(comp) - { - } - - fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) - { - GenTree* node = *use; - if (node->OperIsLocal() && m_compiler->lvaGetDesc(node->AsLclVarCommon()->GetLclNum())->IsAddressExposed()) - { - node->gtFlags |= GTF_GLOB_REF; - } - - if (user != nullptr) - { - user->gtFlags |= node->gtFlags & GTF_GLOB_REF; - } - - return WALK_CONTINUE; - } - }; - - Visitor visitor(this); - visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); -} - /***************************************************************************** * * A little helper used to rearrange nested commutative operations. The @@ -4687,7 +4635,17 @@ GenTree* Compiler::fgMorphLeafLocal(GenTreeLclVarCommon* lclNode) } LclVarDsc* varDsc = lvaGetDesc(lclNode); - if (varDsc->IsAddressExposed()) + // For last-use copy omission candidates we will address expose them when + // we get to the call that passes their address, but they are not actually + // address exposed in the full sense, so we allow standard assertion prop + // on them until that point. However, we must still mark them with + // GTF_GLOB_REF to avoid illegal reordering with the call passing their + // address. + if (varDsc->IsAddressExposed() +#if FEATURE_IMPLICIT_BYREFS + || varDsc->lvIsLastUseCopyOmissionCandidate +#endif + ) { lclNode->gtFlags |= GTF_GLOB_REF; } @@ -8433,7 +8391,12 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: { - if (lvaGetDesc(tree->AsLclVarCommon())->IsAddressExposed()) + LclVarDsc* lclDsc = lvaGetDesc(tree->AsLclVarCommon()); + if (lclDsc->IsAddressExposed() +#if FEATURE_IMPLICIT_BYREFS + || lclDsc->lvIsLastUseCopyOmissionCandidate +#endif + ) { tree->AddAllEffectsFlags(GTF_GLOB_REF); } @@ -13445,7 +13408,6 @@ void Compiler::fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt) void Compiler::fgMorphStmts(BasicBlock* block) { fgRemoveRestOfBlock = false; - fgRemarkGlobalUses = false; for (Statement* const stmt : block->Statements()) { @@ -13560,12 +13522,6 @@ void Compiler::fgMorphStmts(BasicBlock* block) stmt->SetRootNode(morphedTree); - if (fgRemarkGlobalUses) - { - fgMarkGlobalUses(stmt); - fgRemarkGlobalUses = false; - } - if (fgRemoveRestOfBlock) { continue; @@ -14741,6 +14697,163 @@ PhaseStatus Compiler::fgPromoteStructs() return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +//------------------------------------------------------------------------ +// fgMarkImplicitByRefCopyOmissionCandidates: +// Find and mark all locals that are passed as implicit byref args and are +// candidates for last-use copy omission. +// +// Remarks: +// We must mark these locals beforehand to avoid potential reordering with +// the call that ends up getting the address of the local. For example, if we +// waited until morph it would be possible for morph to reorder the two +// occurrences of V00 in +// +// [000015] --CXG------ ▌ CALL void Program:Foo(int,int) +// [000010] ----------- arg0 ├──▌ LCL_FLD int V00 loc0 [+0] +// [000012] --CXG------ arg1 └──▌ CALL int Program:Bar(S):int +// [000011] ----------- arg0 └──▌ LCL_VAR struct V00 loc0 (last use) +// +// to end up with +// +// [000015] --CXG+----- ▌ CALL void Program:Foo(int,int) +// [000037] DACXG------ arg1 setup ├──▌ STORE_LCL_VAR int V04 tmp3 +// [000012] --CXG+----- │ └──▌ CALL int Program:Bar(S):int +// [000011] -----+----- arg0 in rcx │ └──▌ LCL_ADDR long V00 loc0 [+0] +// [000038] ----------- arg1 in rdx ├──▌ LCL_VAR int V04 tmp3 +// [000010] -----+----- arg0 in rcx └──▌ LCL_FLD int (AX) V00 loc0 [+0] +// +// If Bar mutates V00 then this is a problem. +// +// Returns: +// Suitable phase status. +// +PhaseStatus Compiler::fgMarkImplicitByRefCopyOmissionCandidates() +{ +#if FEATURE_IMPLICIT_BYREFS + if (!fgDidEarlyLiveness) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + struct Visitor : GenTreeVisitor + { + enum + { + DoPreOrder = true, + UseExecutionOrder = true, + }; + + Visitor(Compiler* comp) : GenTreeVisitor(comp) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* node = *use; + if ((node->gtFlags & GTF_CALL) == 0) + { + return WALK_SKIP_SUBTREES; + } + + if (!node->IsCall()) + { + return WALK_CONTINUE; + } + + GenTreeCall* call = node->AsCall(); + + for (CallArg& arg : call->gtArgs.Args()) + { + if (!varTypeIsStruct(arg.GetSignatureType())) + { + continue; + } + + GenTree* argNode = arg.GetNode()->gtEffectiveVal(); + if (!argNode->OperIsLocalRead()) + { + continue; + } + + unsigned lclNum = argNode->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + + if (varDsc->lvIsLastUseCopyOmissionCandidate) + { + // Already a candidate. + continue; + } + + if (varDsc->lvIsImplicitByRef) + { + // While implicit byrefs are candidates, they are handled + // specially and do not need GTF_GLOB_REF (the indirections + // added on top already always get them). If we marked them + // as a candidate fgMorphLeafLocal would add GTF_GLOB_REF + // to the local containing the address, which is + // conservative. + continue; + } + + if (varDsc->lvPromoted || varDsc->lvIsStructField || ((argNode->gtFlags & GTF_VAR_DEATH) == 0)) + { + // Not a candidate. + continue; + } + + unsigned structSize = + argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(m_compiler)->GetSize() : genTypeSize(argNode); + + Compiler::structPassingKind passKind; + m_compiler->getArgTypeForStruct(arg.GetSignatureClassHandle(), &passKind, call->IsVarargs(), + structSize); + + if (passKind != SPK_ByReference) + { + continue; + } + + JITDUMP("Marking V%02u as a candidate for last-use copy omission [%06u]\n", lclNum, dspTreeID(argNode)); + varDsc->lvIsLastUseCopyOmissionCandidate = 1; + } + + return WALK_CONTINUE; + } + }; + + Visitor visitor(this); + for (BasicBlock* bb : Blocks()) + { + for (Statement* stmt : bb->Statements()) + { + // Does this have any calls? + if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0) + { + continue; + } + + // If so, check for any struct last use and only do the expensive + // tree walk if one exists. + for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList()) + { + if (!varTypeIsStruct(lcl) || !lcl->OperIsLocalRead()) + { + continue; + } + + if ((lcl->gtFlags & GTF_VAR_DEATH) != 0) + { + visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); + break; + } + } + } + } +#endif + + return PhaseStatus::MODIFIED_NOTHING; +} + //------------------------------------------------------------------------ // fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from // struct to pointer). Also choose (based on address-exposed analysis) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_85611/Runtime_85611.cs b/src/tests/JIT/Regression/JitBlue/Runtime_85611/Runtime_85611.cs new file mode 100644 index 0000000000000..c0f71bf61eda4 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_85611/Runtime_85611.cs @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +public class Runtime_85611 +{ + private static int s_result; + + [Fact] + public static int TestEntryPoint() + { + S s = new(); + s.A = GetA(); + // We need to be careful not to reorder the two accesses of 's' in the call to 'Foo. + // The second is marked as a last use and we make use of that to omit a copy; + // if we reorder them then s.A will read the mutated value inside Bar. + Foo(s.A, Bar(s)); + if (s_result != 100) + { + Console.WriteLine("FAIL: Result is {0}", s_result); + } + + return s_result; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int GetA() => 100; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Foo(int field, int arg) + { + s_result = field; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static int Bar(S s) + { + Use(ref s); + s.A = 101; + return 42; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Use(ref S s) + { + } + + private struct S + { + public int A, B, C, D, E, F, G, H; + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_85611/Runtime_85611.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_85611/Runtime_85611.csproj new file mode 100644 index 0000000000000..c361a935c39d0 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_85611/Runtime_85611.csproj @@ -0,0 +1,11 @@ + + + + true + True + + + + + + \ No newline at end of file