Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support cloning loops with array of struct indexing expressions #55612

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8523,6 +8523,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
* cBlocks, dBlocks : Display all the basic blocks of a function (call fgDispBasicBlocks()).
* cBlocksV, dBlocksV : Display all the basic blocks of a function (call fgDispBasicBlocks(true)).
* "V" means "verbose", and will dump all the trees.
* cStmt, dStmt : Display a Statement (call gtDispStmt()).
* cTree, dTree : Display a tree (call gtDispTree()).
* cTreeLIR, dTreeLIR : Display a tree in LIR form (call gtDispLIRNode()).
* cTrees, dTrees : Display all the trees in a function (call fgDumpTrees()).
Expand All @@ -8545,6 +8546,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*
* The following don't require a Compiler* to work:
* dRegMask : Display a regMaskTP (call dspRegMask(mask)).
* dBlockList : Display a BasicBlockList*.
*/

void cBlock(Compiler* comp, BasicBlock* block)
Expand Down Expand Up @@ -8719,6 +8721,11 @@ void dBlocksV()
cBlocksV(JitTls::GetCompiler());
}

void dStmt(Statement* statement)
{
cStmt(JitTls::GetCompiler(), statement);
}

void dTree(GenTree* tree)
{
cTree(JitTls::GetCompiler(), tree);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3116,6 +3116,8 @@ class Compiler

void gtUpdateNodeOperSideEffects(GenTree* tree);

void gtUpdateNodeOperSideEffectsPost(GenTree* tree);

// Returns "true" iff the complexity (not formally defined, but first interpretation
// is #of nodes in subtree) of "tree" is greater than "limit".
// (This is somewhat redundant with the "GetCostEx()/GetCostSz()" fields, but can be used
Expand Down
16 changes: 7 additions & 9 deletions src/coreclr/jit/fgdiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3253,14 +3253,13 @@ void Compiler::fgDebugCheckFlags(GenTree* tree)
}

//------------------------------------------------------------------------------
// fgDebugCheckDispFlags:
// Wrapper function that displays two GTF_IND_ flags
// and then calls ftDispFlags to display the rest.
// fgDebugCheckDispFlags: Wrapper function that displays GTF_IND_ flags
// and then calls gtDispFlags to display the rest.
//
// Arguments:
// tree - Tree whose flags are being checked
// dispFlags - the first argument for gtDispFlags
// ands hold GTF_IND_INVARIANT and GTF_IND_NONFLUALTING
// dispFlags - the first argument for gtDispFlags (flags to display),
// including GTF_IND_INVARIANT, GTF_IND_NONFAULTING, GTF_IND_NONNULL
// debugFlags - the second argument to gtDispFlags
//
void Compiler::fgDebugCheckDispFlags(GenTree* tree, GenTreeFlags dispFlags, GenTreeDebugFlags debugFlags)
Expand All @@ -3277,15 +3276,14 @@ void Compiler::fgDebugCheckDispFlags(GenTree* tree, GenTreeFlags dispFlags, GenT
//------------------------------------------------------------------------------
// fgDebugCheckFlagsHelper : Check if all bits that are set in chkFlags are also set in treeFlags.
//
//
// Arguments:
// tree - Tree whose flags are being checked
// tree - Tree whose flags are being checked
// treeFlags - Actual flags on the tree
// chkFlags - Expected flags
// chkFlags - Expected flags
//
// Note:
// Checking that all bits that are set in treeFlags are also set in chkFlags is currently disabled.

//
void Compiler::fgDebugCheckFlagsHelper(GenTree* tree, GenTreeFlags treeFlags, GenTreeFlags chkFlags)
{
if (chkFlags & ~treeFlags)
Expand Down
84 changes: 68 additions & 16 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8533,7 +8533,7 @@ void Compiler::gtUpdateSideEffects(Statement* stmt, GenTree* tree)
//
// Arguments:
// tree - Tree to update the side effects for

//
void Compiler::gtUpdateTreeAncestorsSideEffects(GenTree* tree)
{
assert(fgStmtListThreaded);
Expand All @@ -8549,7 +8549,7 @@ void Compiler::gtUpdateTreeAncestorsSideEffects(GenTree* tree)
//
// Arguments:
// stmt - The statement to update side effects on

//
void Compiler::gtUpdateStmtSideEffects(Statement* stmt)
{
fgWalkTree(stmt->GetRootNodePointer(), fgUpdateSideEffectsPre, fgUpdateSideEffectsPost);
Expand All @@ -8565,7 +8565,7 @@ void Compiler::gtUpdateStmtSideEffects(Statement* stmt)
// This method currently only updates GTF_EXCEPT, GTF_ASG, and GTF_CALL flags.
// The other side effect flags may remain unnecessarily (conservatively) set.
// The caller of this method is expected to update the flags based on the children's flags.

//
void Compiler::gtUpdateNodeOperSideEffects(GenTree* tree)
{
if (tree->OperMayThrow(this))
Expand Down Expand Up @@ -8600,6 +8600,46 @@ void Compiler::gtUpdateNodeOperSideEffects(GenTree* tree)
}
}

//------------------------------------------------------------------------
// gtUpdateNodeOperSideEffectsPost: Update the side effects based on the node operation,
// in the post-order visit of a tree walk. It is expected that the pre-order visit cleared
// the bits, so the post-order visit only sets them. This is important for binary nodes
// where one child already may have set the GTF_EXCEPT bit. Note that `SetIndirExceptionFlags`
// looks at its child, which is why we need to do this in a bottom-up walk.
//
// Arguments:
// tree - Tree to update the side effects on
//
// Notes:
// This method currently only updates GTF_EXCEPT, GTF_ASG, and GTF_CALL flags.
// The other side effect flags may remain unnecessarily (conservatively) set.
// The caller of this method is expected to update the flags based on the children's flags.
//
void Compiler::gtUpdateNodeOperSideEffectsPost(GenTree* tree)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is similar to gtUpdateNodeOperSideEffects, but different. I didn't want to touch the other users, and they might actually be doing the right thing as long as they are changing a node whose children are already correct (such as when creating a new node).

{
if (tree->OperMayThrow(this))
{
tree->gtFlags |= GTF_EXCEPT;
}
else
{
if (tree->OperIsIndirOrArrLength())
{
tree->SetIndirExceptionFlags(this);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we do the same that we do in if (tree->OperMayThrow(this)):
that is what happens in if (tree->OperMayThrow(this))

case GT_ARR_LENGTH:
return (((this->gtFlags & GTF_IND_NONFAULTING) == 0) &&
comp->fgAddrCouldBeNull(this->AsArrLen()->ArrRef()));

and that is what happens in SetIndirExceptionFlags:

else
{
assert(gtOper == GT_ARR_LENGTH);
addr = AsArrLen()->ArrRef();
}
if (OperMayThrow(comp) || ((addr->gtFlags & GTF_EXCEPT) != 0))
{
gtFlags |= GTF_EXCEPT;
}

and I can't figure out what is the difference and what is responsible for what.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, it's quite confusing what the contract is here. I can't clear it up, honestly. It seems that OperMayThrow is trying to figure out whether to set GTF_EXCEPT "from basic principles", that is, not depending on anyone having pre-set that bit. However, the GT_INTRINSIC case does consult that (maybe that's actually a bug). fgAddrCouldBeNull does walk through COMMAs. SetIndirExceptionFlags will also set GTF_EXCEPT if the addr has the GTF_EXCEPT bit set, essentially propagating it from it's child. However, it doesn't walk through COMMAs. And it's not clear who is expected to have set those bits.

I think the idea of SetIndirExceptionFlags is to call it when you create a new indir/arr_length node and all the children are already created, and have proper bits. The case in gtUpdateNodeOperSideEffectsPost (that I added) maybe isn't necessary at all: if we're walking up in post-order, OperMayThrow will set the correct bits based on the indir/arr_length and addr; fgUpdateSideEffectsPost will update the node from any COMMA child with op1 with GTF_EXCEPT set (IND(COMMA(...something with GTF_EXCEPT,...))). However, that's not even quite correct, because SetIndirExceptionFlags will clear GTF_EXCEPT (and set GTF_IND_NONFAULTING) based on addr.

}
}

if (tree->OperRequiresAsgFlag())
{
tree->gtFlags |= GTF_ASG;
}

if (tree->OperRequiresCallFlag(this))
{
tree->gtFlags |= GTF_CALL;
}
}

//------------------------------------------------------------------------
// gtUpdateNodeSideEffects: Update the side effects based on the node operation and
// children's side efects.
Expand All @@ -8608,9 +8648,9 @@ void Compiler::gtUpdateNodeOperSideEffects(GenTree* tree)
// tree - Tree to update the side effects on
//
// Notes:
// This method currently only updates GTF_EXCEPT and GTF_ASG flags. The other side effect
// flags may remain unnecessarily (conservatively) set.

// This method currently only updates GTF_EXCEPT, GTF_ASG, and GTF_CALL flags.
// The other side effect flags may remain unnecessarily (conservatively) set.
//
void Compiler::gtUpdateNodeSideEffects(GenTree* tree)
{
gtUpdateNodeOperSideEffects(tree);
Expand All @@ -8627,24 +8667,23 @@ void Compiler::gtUpdateNodeSideEffects(GenTree* tree)

//------------------------------------------------------------------------
// fgUpdateSideEffectsPre: Update the side effects based on the tree operation.
// The pre-visit of the walk only clears the GTF_EXCEPT bit; the post-visit sets
// the bits as necessary.
//
// Arguments:
// pTree - Pointer to the tree to update the side effects
// fgWalkPre - Walk data
//
// Notes:
// This method currently only updates GTF_EXCEPT and GTF_ASG flags. The other side effect
// flags may remain unnecessarily (conservatively) set.

Compiler::fgWalkResult Compiler::fgUpdateSideEffectsPre(GenTree** pTree, fgWalkData* fgWalkPre)
{
fgWalkPre->compiler->gtUpdateNodeOperSideEffects(*pTree);
GenTree* tree = *pTree;
tree->gtFlags &= ~(GTF_EXCEPT | GTF_CALL | GTF_ASG);

return WALK_CONTINUE;
}

//------------------------------------------------------------------------
// fgUpdateSideEffectsPost: Update the side effects of the parent based on the tree's flags.
// fgUpdateSideEffectsPost: Update the side effects of the node and parent based on the tree's flags.
//
// Arguments:
// pTree - Pointer to the tree
Expand All @@ -8653,10 +8692,18 @@ Compiler::fgWalkResult Compiler::fgUpdateSideEffectsPre(GenTree** pTree, fgWalkD
// Notes:
// The routine is used for updating the stale side effect flags for ancestor
// nodes starting from treeParent up to the top-level stmt expr.

//
// This method currently only updates GTF_EXCEPT and GTF_ASG flags. The other side effect
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should GTF_CALL be included?

// flags may remain unnecessarily (conservatively) set.
//
Compiler::fgWalkResult Compiler::fgUpdateSideEffectsPost(GenTree** pTree, fgWalkData* fgWalkPost)
{
GenTree* tree = *pTree;
GenTree* tree = *pTree;

// Update the node's side effects first.
fgWalkPost->compiler->gtUpdateNodeOperSideEffectsPost(tree);

// Then update the parent's side effects based on this node.
GenTree* parent = fgWalkPost->parent;
if (parent != nullptr)
{
Expand Down Expand Up @@ -9908,9 +9955,14 @@ bool GenTree::Precedes(GenTree* other)
// Arguments:
// comp - compiler instance
//

void GenTree::SetIndirExceptionFlags(Compiler* comp)
{
if (OperMayThrow(comp))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a refactoring: don't calculate addr unless we actually need it.

{
gtFlags |= GTF_EXCEPT;
return;
}

GenTree* addr = nullptr;
if (OperIsIndir())
{
Expand All @@ -9922,7 +9974,7 @@ void GenTree::SetIndirExceptionFlags(Compiler* comp)
addr = AsArrLen()->ArrRef();
}

if (OperMayThrow(comp) || ((addr->gtFlags & GTF_EXCEPT) != 0))
if ((addr->gtFlags & GTF_EXCEPT) != 0)
{
gtFlags |= GTF_EXCEPT;
}
Expand Down
107 changes: 20 additions & 87 deletions src/coreclr/jit/loopcloning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2074,9 +2074,15 @@ bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
// dimension of [] encountered.
//
// Operation:
// Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
// we have converted a GT_INDEX tree into a scaled index base offset expression. We need
// to reconstruct this to be able to know if this is an array access.
// Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In morph
// we have converted a GT_INDEX tree into a scaled index base offset expression.
// However, we don't actually bother to parse the morphed tree. All we care about is
// the bounds check node: it contains the array base and element index. The other side
// of the COMMA node can vary between array of primitive type and array of struct. There's
// no need to parse that, as the array bounds check contains the only thing we care about.
// In particular, we are trying to find bounds checks to remove, so only looking at the bounds
// check makes sense. We could verify that the bounds check is against the same array base/index
// but it isn't necessary.
//
// Assumption:
// The method extracts only if the array base and indices are GT_LCL_VAR.
Expand Down Expand Up @@ -2115,6 +2121,12 @@ bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
// | \--* LCL_VAR int V03 loc2
// \--* CNS_INT long 16 Fseq[#FirstElem]
//
// The COMMA op2 expression is the array index expression (or SIMD/Span expression). If we've got
// a "LCL_VAR int" index and "ARR_LENGTH(LCL_VAR ref)", that's good enough for us: we'll assume
// op2 is an array index expression. We don't need to match it just to ensure the index var is
// used as an index expression, or array base var is used as the array base. This saves us from parsing
// all the forms that morph can create, especially for arrays of structs.
//
bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
{
if (tree->gtOper != GT_COMMA)
Expand Down Expand Up @@ -2150,72 +2162,6 @@ bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsN

unsigned indLcl = arrBndsChk->gtIndex->AsLclVarCommon()->GetLclNum();

GenTree* after = tree->gtGetOp2();

if (after->gtOper != GT_IND)
{
return false;
}
// It used to be the case that arrBndsChks for struct types would fail the previous check because
// after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
// return false if the type of 'after' is a struct type. (This was causing us to clone loops
// that we were not previously cloning.)
// TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
// types.
if (varTypeIsStruct(after))
{
return false;
}

GenTree* sibo = after->gtGetOp1(); // sibo = scale*index + base + offset
if (sibo->gtOper != GT_ADD)
{
return false;
}
GenTree* base = sibo->gtGetOp1();
GenTree* sio = sibo->gtGetOp2(); // sio == scale*index + offset
if (base->OperGet() != GT_LCL_VAR || base->AsLclVarCommon()->GetLclNum() != arrLcl)
{
return false;
}
if (sio->gtOper != GT_ADD)
{
return false;
}
GenTree* ofs = sio->gtGetOp2();
GenTree* si = sio->gtGetOp1(); // si = scale*index
if (ofs->gtOper != GT_CNS_INT)
{
return false;
}
GenTree* index;
if (si->gtOper == GT_LSH)
{
GenTree* scale = si->gtGetOp2();
index = si->gtGetOp1();
if (scale->gtOper != GT_CNS_INT)
{
return false;
}
}
else
{
// No scale (e.g., byte array).
index = si;
}
#ifdef TARGET_64BIT
if (index->gtOper != GT_CAST)
{
return false;
}
GenTree* indexVar = index->gtGetOp1();
#else
GenTree* indexVar = index;
#endif
if (indexVar->gtOper != GT_LCL_VAR || indexVar->AsLclVarCommon()->GetLclNum() != indLcl)
{
return false;
}
if (lhsNum == BAD_VAR_NUM)
{
result->arrLcl = arrLcl;
Expand All @@ -2241,26 +2187,13 @@ bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsN
// "result" contains the array access depth. The "indLcls" fields contain the indices.
//
// Operation:
// Recursively look for a list of array indices. In the example below, we encounter,
// V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
// The return value would then be:
// Recursively look for a list of array indices. For example, if the tree is
// V03 = (V05 = V00[V01]), V05[V02]
// that corresponds to access of V00[V01][V02]. The return value would then be:
// ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
//
// V00[V01][V02] would be morphed as:
//
// [000000001B366848] ---XG------- indir int
// [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
// [000000001B36C200] ---XG------- comma int
// [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
// [000000001B36C278] -A-XG------- comma int
// [000000001B366730] R--XG------- indir ref
// [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
// [000000001B36C818] ---XG------- comma ref
// [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
// [000000001B36BB60] -A-XG------- = ref
// [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
// [000000001B36A668] -A-XG------- = int
// [000000001B36A5F0] D------N---- lclVar int V03 tmp0
// Note that the array expression is implied by the array bounds check under the COMMA, and the array bounds
// checks is what is parsed from the morphed tree; the array addressing expression is not parsed.
//
// Assumption:
// The method extracts only if the array base and indices are GT_LCL_VAR.
Expand Down
Loading