Skip to content

Commit

Permalink
JIT: profile updates for finally optimizations (#49139)
Browse files Browse the repository at this point in the history
Update `impImportLeave` to propagate IBC counts to new blocks.

Fix profile weights during callfinally chain merging.

Scale profile weights for cloned finallys. Choose the continuation path based
on profile weight. Make sure to keep handler entry hot.

Partial fix for #48925.
  • Loading branch information
AndyAyersMS authored Mar 5, 2021
1 parent b0ba53a commit b428758
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 39 deletions.
206 changes: 183 additions & 23 deletions src/coreclr/jit/fgehopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -779,18 +779,24 @@ PhaseStatus Compiler::fgCloneFinally()
" %u blocks, %u statements\n",
XTnum, regionBBCount, regionStmtCount);

// Walk the try region backwards looking for the last block
// that transfers control to a callfinally.
// Walk the try region backwards looking for blocks
// that transfer control to a callfinally.
//
// For non-pgo we find the lexically last block; for
// pgo we find the highest-weight block.
//
BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
assert(firstTryBlock->getTryIndex() == XTnum);
assert(bbInTryRegions(XTnum, lastTryBlock));
BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev;

BasicBlock* normalCallFinallyBlock = nullptr;
BasicBlock* normalCallFinallyReturn = nullptr;
BasicBlock* cloneInsertAfter = HBtab->ebdTryLast;
bool tryToRelocateCallFinally = false;
BasicBlock* normalCallFinallyBlock = nullptr;
BasicBlock* normalCallFinallyReturn = nullptr;
BasicBlock* cloneInsertAfter = HBtab->ebdTryLast;
bool tryToRelocateCallFinally = false;
const bool usingProfileWeights = fgIsUsingProfileWeights();
BasicBlock::weight_t currentWeight = BB_ZERO_WEIGHT;

for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->bbPrev)
{
Expand Down Expand Up @@ -830,12 +836,63 @@ PhaseStatus Compiler::fgCloneFinally()
BasicBlock* const jumpDest = block;
#endif // FEATURE_EH_CALLFINALLY_THUNKS

// Found our block.
// Found a block that invokes the finally.
//
BasicBlock* const finallyReturnBlock = jumpDest->bbNext;
BasicBlock* const postTryFinallyBlock = finallyReturnBlock->bbJumpDest;
bool isUpdate = false;

normalCallFinallyBlock = jumpDest;
normalCallFinallyReturn = postTryFinallyBlock;
// See if this is the one we want to use to inspire cloning.
//
if (normalCallFinallyBlock == nullptr)
{
normalCallFinallyBlock = jumpDest;
normalCallFinallyReturn = postTryFinallyBlock;

if (usingProfileWeights)
{
if (block->hasProfileWeight())
{
JITDUMP("Found profiled " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight);
currentWeight = block->bbWeight;
}
else
{
JITDUMP("Found unprofiled " FMT_BB "\n", block->bbNum);
}
}
}
else
{
assert(usingProfileWeights);

if (!block->hasProfileWeight())
{
// An unprofiled block in method with profile data.
// We generally don't expect to see these as the
// blocks in EH regions must have come from the root
// method, which we know has profile data.
// Just skip over them for now.
//
JITDUMP("Skipping past unprofiled " FMT_BB "\n", block->bbNum);
continue;
}

if (block->bbWeight <= currentWeight)
{
JITDUMP("Skipping past " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight);
continue;
}

// Prefer this block.
//
JITDUMP("Preferring " FMT_BB " since " FMT_WT " > " FMT_WT "\n", block->bbNum, block->bbWeight,
currentWeight);
normalCallFinallyBlock = jumpDest;
normalCallFinallyReturn = postTryFinallyBlock;
currentWeight = block->bbWeight;
isUpdate = true;
}

#if FEATURE_EH_CALLFINALLY_THUNKS
// When there are callfinally thunks, we don't expect to see the
Expand All @@ -850,16 +907,24 @@ PhaseStatus Compiler::fgCloneFinally()
// through from the try into the clone.
tryToRelocateCallFinally = true;

JITDUMP("Chose path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";"
JITDUMP("%s path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";"
" the call returns to " FMT_BB " which jumps to " FMT_BB "\n",
block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum);
isUpdate ? "Updating" : "Choosing", block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum,
postTryFinallyBlock->bbNum);
#else
JITDUMP("Chose path to clone: try block " FMT_BB " is a callfinally;"
JITDUMP("%s path to clone: try block " FMT_BB " is a callfinally;"
" the call returns to " FMT_BB " which jumps to " FMT_BB "\n",
block->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum);
isUpdate ? "Updating" : "Choosing", block->bbNum, finallyReturnBlock->bbNum,
postTryFinallyBlock->bbNum);
#endif // FEATURE_EH_CALLFINALLY_THUNKS

break;
// For non-pgo just take the first one we find.
// For pgo, keep searching in case we find one we like better.
//
if (!usingProfileWeights)
{
break;
}
}

// If there is no call to the finally, don't clone.
Expand Down Expand Up @@ -956,11 +1021,14 @@ PhaseStatus Compiler::fgCloneFinally()
//
// Clone the finally body, and splice it into the flow graph
// within in the parent region of the try.
const unsigned finallyTryIndex = firstBlock->bbTryIndex;
BasicBlock* insertAfter = nullptr;
BlockToBlockMap blockMap(getAllocator());
bool clonedOk = true;
unsigned cloneBBCount = 0;
//
const unsigned finallyTryIndex = firstBlock->bbTryIndex;
BasicBlock* insertAfter = nullptr;
BlockToBlockMap blockMap(getAllocator());
bool clonedOk = true;
unsigned cloneBBCount = 0;
BasicBlock::weight_t const originalWeight =
firstBlock->hasProfileWeight() ? firstBlock->bbWeight : BB_ZERO_WEIGHT;

for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext)
{
Expand Down Expand Up @@ -1067,9 +1135,10 @@ PhaseStatus Compiler::fgCloneFinally()
// Modify the targeting call finallys to branch to the cloned
// finally. Make a note if we see some calls that can't be
// retargeted (since they want to return to other places).
BasicBlock* const firstCloneBlock = blockMap[firstBlock];
bool retargetedAllCalls = true;
BasicBlock* currentBlock = firstCallFinallyRangeBlock;
BasicBlock* const firstCloneBlock = blockMap[firstBlock];
bool retargetedAllCalls = true;
BasicBlock* currentBlock = firstCallFinallyRangeBlock;
BasicBlock::weight_t retargetedWeight = BB_ZERO_WEIGHT;

while (currentBlock != endCallFinallyRangeBlock)
{
Expand All @@ -1088,6 +1157,9 @@ PhaseStatus Compiler::fgCloneFinally()
// by the cloned finally and by the called finally.
if (postTryFinallyBlock == normalCallFinallyReturn)
{
JITDUMP("Retargeting callfinally " FMT_BB " to clone entry " FMT_BB "\n", currentBlock->bbNum,
firstCloneBlock->bbNum);

// This call returns to the expected spot, so
// retarget it to branch to the clone.
currentBlock->bbJumpDest = firstCloneBlock;
Expand All @@ -1107,6 +1179,11 @@ PhaseStatus Compiler::fgCloneFinally()

// Make sure iteration isn't going off the deep end.
assert(leaveBlock != endCallFinallyRangeBlock);

if (currentBlock->hasProfileWeight())
{
retargetedWeight += currentBlock->bbWeight;
}
}
else
{
Expand Down Expand Up @@ -1144,7 +1221,53 @@ PhaseStatus Compiler::fgCloneFinally()
// Cleanup the continuation
fgCleanupContinuation(normalCallFinallyReturn);

// Todo -- mark cloned blocks as a cloned finally....
// If we have profile data, compute how the weights split,
// and update the weights in both the clone and the original.
//
// TODO: if original weight is zero, we probably should forgo cloning...?
//
// TODO: it will frequently be the case that the original scale is 0.0 as
// all the profiled flow will go to the clone.
//
// Decide if we really want to set all those counts to zero, and if so
// whether we should mark the original as rarely run.
//
if (usingProfileWeights && (originalWeight > BB_ZERO_WEIGHT))
{
// We can't leave the finally more often than we enter.
// So cap cloned scale at 1.0f
//
BasicBlock::weight_t const clonedScale =
retargetedWeight < originalWeight ? (retargetedWeight / originalWeight) : 1.0f;
BasicBlock::weight_t const originalScale = 1.0f - clonedScale;

JITDUMP("Profile scale factor (" FMT_WT "/" FMT_WT ") => clone " FMT_WT " / original " FMT_WT "\n",
retargetedWeight, originalWeight, clonedScale, originalScale);

for (BasicBlock* block = firstBlock; block != lastBlock->bbNext; block = block->bbNext)
{
if (block->hasProfileWeight())
{
BasicBlock::weight_t const blockWeight = block->bbWeight;
block->setBBProfileWeight(blockWeight * originalScale);
JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", block->bbNum, block->bbWeight);

#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
// Handle a special case -- some handler entries can't have zero profile count.
//
if (bbIsHandlerBeg(block) && block->isRunRarely())
{
JITDUMP("Suppressing zero count for " FMT_BB " as it is a handler entry\n", block->bbNum);
block->makeBlockHot();
}
#endif

BasicBlock* const clonedBlock = blockMap[block];
clonedBlock->setBBProfileWeight(blockWeight * clonedScale);
JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", clonedBlock->bbNum, clonedBlock->bbWeight);
}
}
}

// Done!
JITDUMP("\nDone with EH#%u\n\n", XTnum);
Expand Down Expand Up @@ -1767,6 +1890,43 @@ bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block,
assert(callFinally->bbRefs > 0);
fgRemoveRefPred(callFinally, block);

// Update profile counts
//
if (block->hasProfileWeight())
{
// Add weight to the canonical call finally pair.
//
BasicBlock::weight_t const canonicalWeight =
canonicalCallFinally->hasProfileWeight() ? canonicalCallFinally->bbWeight : BB_ZERO_WEIGHT;
BasicBlock::weight_t const newCanonicalWeight = block->bbWeight + canonicalWeight;

canonicalCallFinally->setBBProfileWeight(newCanonicalWeight);

BasicBlock* const canonicalLeaveBlock = canonicalCallFinally->bbNext;

BasicBlock::weight_t const canonicalLeaveWeight =
canonicalLeaveBlock->hasProfileWeight() ? canonicalLeaveBlock->bbWeight : BB_ZERO_WEIGHT;
BasicBlock::weight_t const newLeaveWeight = block->bbWeight + canonicalLeaveWeight;

canonicalLeaveBlock->setBBProfileWeight(newLeaveWeight);

// Remove weight from the old call finally pair.
//
if (callFinally->hasProfileWeight())
{
BasicBlock::weight_t const newCallFinallyWeight =
callFinally->bbWeight > block->bbWeight ? callFinally->bbWeight - block->bbWeight : BB_ZERO_WEIGHT;
callFinally->setBBProfileWeight(newCallFinallyWeight);
}

if (leaveBlock->hasProfileWeight())
{
BasicBlock::weight_t const newLeaveWeight =
leaveBlock->bbWeight > block->bbWeight ? leaveBlock->bbWeight - block->bbWeight : BB_ZERO_WEIGHT;
leaveBlock->setBBProfileWeight(newLeaveWeight);
}
}

return true;
}

Expand Down
30 changes: 14 additions & 16 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9943,8 +9943,7 @@ void Compiler::impImportLeave(BasicBlock* block)
step->bbJumpDest->bbRefs++;

/* The new block will inherit this block's weight */
callBlock->setBBWeight(block->bbWeight);
callBlock->bbFlags |= block->bbFlags & BBF_RUN_RARELY;
callBlock->inheritWeight(block);

#ifdef DEBUG
if (verbose)
Expand Down Expand Up @@ -9973,8 +9972,8 @@ void Compiler::impImportLeave(BasicBlock* block)

step = fgNewBBafter(BBJ_ALWAYS, callBlock, true);
/* The new block will inherit this block's weight */
step->setBBWeight(block->bbWeight);
step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;
step->inheritWeight(block);
step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;

#ifdef DEBUG
if (verbose)
Expand Down Expand Up @@ -10034,8 +10033,7 @@ void Compiler::impImportLeave(BasicBlock* block)
step->bbJumpDest = finalStep;

/* The new block will inherit this block's weight */
finalStep->setBBWeight(block->bbWeight);
finalStep->bbFlags |= block->bbFlags & BBF_RUN_RARELY;
finalStep->inheritWeight(block);

#ifdef DEBUG
if (verbose)
Expand Down Expand Up @@ -10197,8 +10195,8 @@ void Compiler::impImportLeave(BasicBlock* block)
#endif // defined(TARGET_ARM)

/* The new block will inherit this block's weight */
exitBlock->setBBWeight(block->bbWeight);
exitBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
exitBlock->inheritWeight(block);
exitBlock->bbFlags |= BBF_IMPORTED;

/* This exit block is the new step */
step = exitBlock;
Expand Down Expand Up @@ -10242,8 +10240,8 @@ void Compiler::impImportLeave(BasicBlock* block)
block->bbJumpDest->bbRefs++;

/* The new block will inherit this block's weight */
callBlock->setBBWeight(block->bbWeight);
callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
callBlock->inheritWeight(block);
callBlock->bbFlags |= BBF_IMPORTED;

#ifdef DEBUG
if (verbose)
Expand Down Expand Up @@ -10342,8 +10340,8 @@ void Compiler::impImportLeave(BasicBlock* block)
#endif // defined(TARGET_ARM)

/* The new block will inherit this block's weight */
callBlock->setBBWeight(block->bbWeight);
callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
callBlock->inheritWeight(block);
callBlock->bbFlags |= BBF_IMPORTED;

#ifdef DEBUG
if (verbose)
Expand All @@ -10359,8 +10357,8 @@ void Compiler::impImportLeave(BasicBlock* block)
stepType = ST_FinallyReturn;

/* The new block will inherit this block's weight */
step->setBBWeight(block->bbWeight);
step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;
step->inheritWeight(block);
step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;

#ifdef DEBUG
if (verbose)
Expand Down Expand Up @@ -10446,8 +10444,8 @@ void Compiler::impImportLeave(BasicBlock* block)
#endif // defined(TARGET_ARM)

/* The new block will inherit this block's weight */
catchStep->setBBWeight(block->bbWeight);
catchStep->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
catchStep->inheritWeight(block);
catchStep->bbFlags |= BBF_IMPORTED;

#ifdef DEBUG
if (verbose)
Expand Down

0 comments on commit b428758

Please sign in to comment.