Skip to content

Commit

Permalink
JIT: Ensure last-use copy omission candidates are marked with GTF_GLO…
Browse files Browse the repository at this point in the history
…B_REF (#89088)

Last-use copy omission allows the JIT to avoid creating copies of struct
values that are passed implicitly by-ref when the value is a last-use of
a local. When we do this we effectively address expose the
local for the lifetime of the call, so we mark the local as address
exposed as part of doing so.

However, there is a problem where morph may reorder two uses of a such a
local and break the last use information. For example, consider the
following case:

```
[000015] --CXG------      ▌  CALL      void   Program:Foo(int,int)
[000010] ----------- arg0 ├──▌  LCL_FLD   int    V00 loc0         [+0]
[000012] --CXG------ arg1 └──▌  CALL      int    Program:Bar(S):int
[000011] ----------- arg0    └──▌  LCL_VAR   struct<S, 32> V00 loc0          (last use)
```

V00 is used both at [000010] and at [000011], the latter as a last use.
Since we only address expose V00 when we get to [000011] we do not mark
[000010] with GTF_GLOB_REF; the net effect is the following call args
morphing, where we have reordered the two occurrences illegally:

```
[000015] --CXG+-----             ▌  CALL      void   Program:Foo(int,int)
[000037] DACXG------ arg1 setup  ├──▌  STORE_LCL_VAR int    V04 tmp3
[000012] --CXG+-----             │  └──▌  CALL      int    Program:Bar(S):int
[000011] -----+----- arg0 in rcx │     └──▌  LCL_ADDR  long   V00 loc0         [+0]
[000038] ----------- arg1 in rdx ├──▌  LCL_VAR   int    V04 tmp3
[000010] -----+----- arg0 in rcx └──▌  LCL_FLD   int   (AX) V00 loc0         [+0]
```

This change fixes the problem by running a separate pass over the IR
before morph to identify and address expose the candidates for last-use
copy omission. The net result is then the following correct IR:

```
[000015] --CXG+-----             ▌  CALL      void   Runtime_85611:Foo(int,int)
[000039] DA--G------ arg0 setup  ├──▌  STORE_LCL_VAR int    V05 tmp4
[000010] ----G+-----             │  └──▌  LCL_FLD   int   (AX) V00 loc0         [+0]
[000037] DACXG------ arg1 setup  ├──▌  STORE_LCL_VAR int    V04 tmp3
[000012] --CXG+-----             │  └──▌  CALL      int    Runtime_85611:Bar(Runtime_85611+S):int
[000011] ----G+----- arg0 in rcx │     └──▌  LCL_ADDR  long   V00 loc0         [+0]
[000038] ----------- arg1 in rdx ├──▌  LCL_VAR   int    V04 tmp3
[000040] ----------- arg0 in rcx └──▌  LCL_VAR   int    V05 tmp4
```

The pass has some TP impact but it is mitigated since we only need to
visit statements with GTF_CALL and since we can use the locals tree list
to prune which statements we visit.

Fix #85611
  • Loading branch information
jakobbotsch authored Jul 19, 2023
1 parent d051a84 commit 3b46cf5
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 70 deletions.
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4732,6 +4732,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_PHYSICAL_PROMOTION, &Compiler::PhysicalPromotion);

// Expose candidates for implicit byref last-use copy elision.
DoPhase(this, PHASE_IMPBYREF_COPY_OMISSION, &Compiler::fgMarkImplicitByRefCopyOmissionCandidates);

// Locals tree list is no longer kept valid.
fgNodeThreading = NodeThreading::None;

Expand Down
12 changes: 8 additions & 4 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,11 @@ class LclVarDsc
unsigned char lvIsTemp : 1; // Short-lifetime compiler temp

#if FEATURE_IMPLICIT_BYREFS
unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
#endif // FEATURE_IMPLICIT_BYREFS
// Set if the argument is an implicit byref.
unsigned char lvIsImplicitByRef : 1;
// Set if the local appears as a last use that will be passed as an implicit byref.
unsigned char lvIsLastUseCopyOmissionCandidate : 1;
#endif // FEATURE_IMPLICIT_BYREFS

#if defined(TARGET_LOONGARCH64)
unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64.
Expand Down Expand Up @@ -4616,7 +4619,6 @@ class Compiler

bool fgRemoveRestOfBlock; // true if we know that we will throw
bool fgStmtRemoved; // true if we remove statements -> need new DFA
bool fgRemarkGlobalUses; // true if morph should remark global uses after processing a statement

enum FlowGraphOrder
{
Expand Down Expand Up @@ -5917,7 +5919,6 @@ class Compiler
GenTreeCall* fgMorphArgs(GenTreeCall* call);

void fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg);
void fgMarkGlobalUses(Statement* stmt);

GenTree* fgMorphLeafLocal(GenTreeLclVarCommon* lclNode);
#ifdef TARGET_X86
Expand Down Expand Up @@ -6130,6 +6131,9 @@ class Compiler
// Reset the refCount for implicit byrefs.
void fgResetImplicitByRefRefCount();

// Identify all candidates for last-use copy omission.
PhaseStatus fgMarkImplicitByRefCopyOmissionCandidates();

// Change implicit byrefs' types from struct to pointer, and for any that were
// promoted, create new promoted struct temps.
PhaseStatus fgRetypeImplicitByRefArgs();
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compphases.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp",
CompPhaseNameMacro(PHASE_EARLY_LIVENESS, "Early liveness", false, -1, false)
CompPhaseNameMacro(PHASE_PHYSICAL_PROMOTION, "Physical promotion", false, -1, false)
CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", false, -1, false)
CompPhaseNameMacro(PHASE_IMPBYREF_COPY_OMISSION, "Identify candidates for implicit byref copy omission", false, -1, false)
CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", false, -1, false)
CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", false, -1, false)
CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", false, -1, false)
Expand Down
245 changes: 179 additions & 66 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3955,6 +3955,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
GenTree* argx = arg->GetEarlyNode();
noway_assert(!argx->OperIs(GT_MKREFANY));

#if FEATURE_IMPLICIT_BYREFS
// If we're optimizing, see if we can avoid making a copy.
//
// We don't need a copy if this is the last use of the local.
Expand Down Expand Up @@ -3992,7 +3993,8 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)

if (!omitCopy && fgGlobalMorph)
{
omitCopy = !varDsc->lvPromoted && !varDsc->lvIsStructField && ((lcl->gtFlags & GTF_VAR_DEATH) != 0);
omitCopy = (varDsc->lvIsLastUseCopyOmissionCandidate || (implicitByRefLcl != nullptr)) &&
!varDsc->lvPromoted && !varDsc->lvIsStructField && ((lcl->gtFlags & GTF_VAR_DEATH) != 0);
}

if (omitCopy)
Expand All @@ -4007,28 +4009,19 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
lcl->ChangeOper(GT_LCL_ADDR);
lcl->AsLclFld()->SetLclOffs(offs);
lcl->gtType = TYP_I_IMPL;
lcl->gtFlags &= ~GTF_ALL_EFFECT;
lvaSetVarAddrExposed(varNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS));

// Copy prop could allow creating another later use of lcl if there are live assertions about it.
fgKillDependentAssertions(varNum DEBUGARG(lcl));

// We may have seen previous uses of this local already,
// and those uses should now be marked as GTF_GLOB_REF
// since otherwise they could be reordered with the call.
// The only known issues are under stress mode and happen
// in the same statement, so we only handle this case currently.
// TODO: A more complete fix will likely entail identifying
// these candidates before morph and address exposing them
// at that point, which first requires ABI determination to
// be moved earlier.
fgRemarkGlobalUses = true;
}

JITDUMP("did not need to make outgoing copy for last use of V%02d\n", varNum);
return;
}
}
}
#endif

JITDUMP("making an outgoing copy for struct arg\n");

Expand Down Expand Up @@ -4102,51 +4095,6 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
arg->SetEarlyNode(argNode);
}

//------------------------------------------------------------------------
// fgMarkNewlyGlobalUses: Given a local that is newly address exposed, add
// GTF_GLOB_REF whever necessary in the specified statement.
//
// Arguments:
// stmt - The statement
// lclNum - The local that is newly address exposed
//
// Notes:
// See comment in fgMakeOutgoingStructArgCopy.
//
void Compiler::fgMarkGlobalUses(Statement* stmt)
{
struct Visitor : GenTreeVisitor<Visitor>
{
enum
{
DoPostOrder = true,
};

Visitor(Compiler* comp) : GenTreeVisitor(comp)
{
}

fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
{
GenTree* node = *use;
if (node->OperIsLocal() && m_compiler->lvaGetDesc(node->AsLclVarCommon()->GetLclNum())->IsAddressExposed())
{
node->gtFlags |= GTF_GLOB_REF;
}

if (user != nullptr)
{
user->gtFlags |= node->gtFlags & GTF_GLOB_REF;
}

return WALK_CONTINUE;
}
};

Visitor visitor(this);
visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
}

/*****************************************************************************
*
* A little helper used to rearrange nested commutative operations. The
Expand Down Expand Up @@ -4687,7 +4635,17 @@ GenTree* Compiler::fgMorphLeafLocal(GenTreeLclVarCommon* lclNode)
}

LclVarDsc* varDsc = lvaGetDesc(lclNode);
if (varDsc->IsAddressExposed())
// For last-use copy omission candidates we will address expose them when
// we get to the call that passes their address, but they are not actually
// address exposed in the full sense, so we allow standard assertion prop
// on them until that point. However, we must still mark them with
// GTF_GLOB_REF to avoid illegal reordering with the call passing their
// address.
if (varDsc->IsAddressExposed()
#if FEATURE_IMPLICIT_BYREFS
|| varDsc->lvIsLastUseCopyOmissionCandidate
#endif
)
{
lclNode->gtFlags |= GTF_GLOB_REF;
}
Expand Down Expand Up @@ -8433,7 +8391,12 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
case GT_STORE_LCL_VAR:
case GT_STORE_LCL_FLD:
{
if (lvaGetDesc(tree->AsLclVarCommon())->IsAddressExposed())
LclVarDsc* lclDsc = lvaGetDesc(tree->AsLclVarCommon());
if (lclDsc->IsAddressExposed()
#if FEATURE_IMPLICIT_BYREFS
|| lclDsc->lvIsLastUseCopyOmissionCandidate
#endif
)
{
tree->AddAllEffectsFlags(GTF_GLOB_REF);
}
Expand Down Expand Up @@ -13445,7 +13408,6 @@ void Compiler::fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt)
void Compiler::fgMorphStmts(BasicBlock* block)
{
fgRemoveRestOfBlock = false;
fgRemarkGlobalUses = false;

for (Statement* const stmt : block->Statements())
{
Expand Down Expand Up @@ -13560,12 +13522,6 @@ void Compiler::fgMorphStmts(BasicBlock* block)

stmt->SetRootNode(morphedTree);

if (fgRemarkGlobalUses)
{
fgMarkGlobalUses(stmt);
fgRemarkGlobalUses = false;
}

if (fgRemoveRestOfBlock)
{
continue;
Expand Down Expand Up @@ -14741,6 +14697,163 @@ PhaseStatus Compiler::fgPromoteStructs()
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//------------------------------------------------------------------------
// fgMarkImplicitByRefCopyOmissionCandidates:
// Find and mark all locals that are passed as implicit byref args and are
// candidates for last-use copy omission.
//
// Remarks:
// We must mark these locals beforehand to avoid potential reordering with
// the call that ends up getting the address of the local. For example, if we
// waited until morph it would be possible for morph to reorder the two
// occurrences of V00 in
//
// [000015] --CXG------ ▌ CALL void Program:Foo(int,int)
// [000010] ----------- arg0 ├──▌ LCL_FLD int V00 loc0 [+0]
// [000012] --CXG------ arg1 └──▌ CALL int Program:Bar(S):int
// [000011] ----------- arg0 └──▌ LCL_VAR struct<S, 32> V00 loc0 (last use)
//
// to end up with
//
// [000015] --CXG+----- ▌ CALL void Program:Foo(int,int)
// [000037] DACXG------ arg1 setup ├──▌ STORE_LCL_VAR int V04 tmp3
// [000012] --CXG+----- │ └──▌ CALL int Program:Bar(S):int
// [000011] -----+----- arg0 in rcx │ └──▌ LCL_ADDR long V00 loc0 [+0]
// [000038] ----------- arg1 in rdx ├──▌ LCL_VAR int V04 tmp3
// [000010] -----+----- arg0 in rcx └──▌ LCL_FLD int (AX) V00 loc0 [+0]
//
// If Bar mutates V00 then this is a problem.
//
// Returns:
// Suitable phase status.
//
PhaseStatus Compiler::fgMarkImplicitByRefCopyOmissionCandidates()
{
#if FEATURE_IMPLICIT_BYREFS
if (!fgDidEarlyLiveness)
{
return PhaseStatus::MODIFIED_NOTHING;
}

struct Visitor : GenTreeVisitor<Visitor>
{
enum
{
DoPreOrder = true,
UseExecutionOrder = true,
};

Visitor(Compiler* comp) : GenTreeVisitor(comp)
{
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* node = *use;
if ((node->gtFlags & GTF_CALL) == 0)
{
return WALK_SKIP_SUBTREES;
}

if (!node->IsCall())
{
return WALK_CONTINUE;
}

GenTreeCall* call = node->AsCall();

for (CallArg& arg : call->gtArgs.Args())
{
if (!varTypeIsStruct(arg.GetSignatureType()))
{
continue;
}

GenTree* argNode = arg.GetNode()->gtEffectiveVal();
if (!argNode->OperIsLocalRead())
{
continue;
}

unsigned lclNum = argNode->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);

if (varDsc->lvIsLastUseCopyOmissionCandidate)
{
// Already a candidate.
continue;
}

if (varDsc->lvIsImplicitByRef)
{
// While implicit byrefs are candidates, they are handled
// specially and do not need GTF_GLOB_REF (the indirections
// added on top already always get them). If we marked them
// as a candidate fgMorphLeafLocal would add GTF_GLOB_REF
// to the local containing the address, which is
// conservative.
continue;
}

if (varDsc->lvPromoted || varDsc->lvIsStructField || ((argNode->gtFlags & GTF_VAR_DEATH) == 0))
{
// Not a candidate.
continue;
}

unsigned structSize =
argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(m_compiler)->GetSize() : genTypeSize(argNode);

Compiler::structPassingKind passKind;
m_compiler->getArgTypeForStruct(arg.GetSignatureClassHandle(), &passKind, call->IsVarargs(),
structSize);

if (passKind != SPK_ByReference)
{
continue;
}

JITDUMP("Marking V%02u as a candidate for last-use copy omission [%06u]\n", lclNum, dspTreeID(argNode));
varDsc->lvIsLastUseCopyOmissionCandidate = 1;
}

return WALK_CONTINUE;
}
};

Visitor visitor(this);
for (BasicBlock* bb : Blocks())
{
for (Statement* stmt : bb->Statements())
{
// Does this have any calls?
if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0)
{
continue;
}

// If so, check for any struct last use and only do the expensive
// tree walk if one exists.
for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList())
{
if (!varTypeIsStruct(lcl) || !lcl->OperIsLocalRead())
{
continue;
}

if ((lcl->gtFlags & GTF_VAR_DEATH) != 0)
{
visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
break;
}
}
}
}
#endif

return PhaseStatus::MODIFIED_NOTHING;
}

//------------------------------------------------------------------------
// fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
// struct to pointer). Also choose (based on address-exposed analysis)
Expand Down
Loading

0 comments on commit 3b46cf5

Please sign in to comment.