From b428758ce9f68c46096f7f761ebc47cf31670ae0 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 4 Mar 2021 20:09:43 -0800 Subject: [PATCH] JIT: profile updates for finally optimizations (#49139) Update `impImportLeave` to propagate IBC counts to new blocks. Fix profile weights during callfinally chain merging. Scale profile weights for cloned finallys. Choose the continuation path based on profile weight. Make sure to keep handler entry hot. Partial fix for #48925. --- src/coreclr/jit/fgehopt.cpp | 206 +++++++++++++++++++++++++++++++---- src/coreclr/jit/importer.cpp | 30 +++-- 2 files changed, 197 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index ba817b5b49091..7de6120fd9b4a 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -779,18 +779,24 @@ PhaseStatus Compiler::fgCloneFinally() " %u blocks, %u statements\n", XTnum, regionBBCount, regionStmtCount); - // Walk the try region backwards looking for the last block - // that transfers control to a callfinally. + // Walk the try region backwards looking for blocks + // that transfer control to a callfinally. + // + // For non-pgo we find the lexically last block; for + // pgo we find the highest-weight block. + // BasicBlock* const firstTryBlock = HBtab->ebdTryBeg; BasicBlock* const lastTryBlock = HBtab->ebdTryLast; assert(firstTryBlock->getTryIndex() == XTnum); assert(bbInTryRegions(XTnum, lastTryBlock)); BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev; - BasicBlock* normalCallFinallyBlock = nullptr; - BasicBlock* normalCallFinallyReturn = nullptr; - BasicBlock* cloneInsertAfter = HBtab->ebdTryLast; - bool tryToRelocateCallFinally = false; + BasicBlock* normalCallFinallyBlock = nullptr; + BasicBlock* normalCallFinallyReturn = nullptr; + BasicBlock* cloneInsertAfter = HBtab->ebdTryLast; + bool tryToRelocateCallFinally = false; + const bool usingProfileWeights = fgIsUsingProfileWeights(); + BasicBlock::weight_t currentWeight = BB_ZERO_WEIGHT; for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->bbPrev) { @@ -830,12 +836,63 @@ PhaseStatus Compiler::fgCloneFinally() BasicBlock* const jumpDest = block; #endif // FEATURE_EH_CALLFINALLY_THUNKS - // Found our block. + // Found a block that invokes the finally. + // BasicBlock* const finallyReturnBlock = jumpDest->bbNext; BasicBlock* const postTryFinallyBlock = finallyReturnBlock->bbJumpDest; + bool isUpdate = false; - normalCallFinallyBlock = jumpDest; - normalCallFinallyReturn = postTryFinallyBlock; + // See if this is the one we want to use to inspire cloning. + // + if (normalCallFinallyBlock == nullptr) + { + normalCallFinallyBlock = jumpDest; + normalCallFinallyReturn = postTryFinallyBlock; + + if (usingProfileWeights) + { + if (block->hasProfileWeight()) + { + JITDUMP("Found profiled " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight); + currentWeight = block->bbWeight; + } + else + { + JITDUMP("Found unprofiled " FMT_BB "\n", block->bbNum); + } + } + } + else + { + assert(usingProfileWeights); + + if (!block->hasProfileWeight()) + { + // An unprofiled block in method with profile data. + // We generally don't expect to see these as the + // blocks in EH regions must have come from the root + // method, which we know has profile data. + // Just skip over them for now. + // + JITDUMP("Skipping past unprofiled " FMT_BB "\n", block->bbNum); + continue; + } + + if (block->bbWeight <= currentWeight) + { + JITDUMP("Skipping past " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight); + continue; + } + + // Prefer this block. + // + JITDUMP("Preferring " FMT_BB " since " FMT_WT " > " FMT_WT "\n", block->bbNum, block->bbWeight, + currentWeight); + normalCallFinallyBlock = jumpDest; + normalCallFinallyReturn = postTryFinallyBlock; + currentWeight = block->bbWeight; + isUpdate = true; + } #if FEATURE_EH_CALLFINALLY_THUNKS // When there are callfinally thunks, we don't expect to see the @@ -850,16 +907,24 @@ PhaseStatus Compiler::fgCloneFinally() // through from the try into the clone. tryToRelocateCallFinally = true; - JITDUMP("Chose path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";" + JITDUMP("%s path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";" " the call returns to " FMT_BB " which jumps to " FMT_BB "\n", - block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); + isUpdate ? "Updating" : "Choosing", block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, + postTryFinallyBlock->bbNum); #else - JITDUMP("Chose path to clone: try block " FMT_BB " is a callfinally;" + JITDUMP("%s path to clone: try block " FMT_BB " is a callfinally;" " the call returns to " FMT_BB " which jumps to " FMT_BB "\n", - block->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); + isUpdate ? "Updating" : "Choosing", block->bbNum, finallyReturnBlock->bbNum, + postTryFinallyBlock->bbNum); #endif // FEATURE_EH_CALLFINALLY_THUNKS - break; + // For non-pgo just take the first one we find. + // For pgo, keep searching in case we find one we like better. + // + if (!usingProfileWeights) + { + break; + } } // If there is no call to the finally, don't clone. @@ -956,11 +1021,14 @@ PhaseStatus Compiler::fgCloneFinally() // // Clone the finally body, and splice it into the flow graph // within in the parent region of the try. - const unsigned finallyTryIndex = firstBlock->bbTryIndex; - BasicBlock* insertAfter = nullptr; - BlockToBlockMap blockMap(getAllocator()); - bool clonedOk = true; - unsigned cloneBBCount = 0; + // + const unsigned finallyTryIndex = firstBlock->bbTryIndex; + BasicBlock* insertAfter = nullptr; + BlockToBlockMap blockMap(getAllocator()); + bool clonedOk = true; + unsigned cloneBBCount = 0; + BasicBlock::weight_t const originalWeight = + firstBlock->hasProfileWeight() ? firstBlock->bbWeight : BB_ZERO_WEIGHT; for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext) { @@ -1067,9 +1135,10 @@ PhaseStatus Compiler::fgCloneFinally() // Modify the targeting call finallys to branch to the cloned // finally. Make a note if we see some calls that can't be // retargeted (since they want to return to other places). - BasicBlock* const firstCloneBlock = blockMap[firstBlock]; - bool retargetedAllCalls = true; - BasicBlock* currentBlock = firstCallFinallyRangeBlock; + BasicBlock* const firstCloneBlock = blockMap[firstBlock]; + bool retargetedAllCalls = true; + BasicBlock* currentBlock = firstCallFinallyRangeBlock; + BasicBlock::weight_t retargetedWeight = BB_ZERO_WEIGHT; while (currentBlock != endCallFinallyRangeBlock) { @@ -1088,6 +1157,9 @@ PhaseStatus Compiler::fgCloneFinally() // by the cloned finally and by the called finally. if (postTryFinallyBlock == normalCallFinallyReturn) { + JITDUMP("Retargeting callfinally " FMT_BB " to clone entry " FMT_BB "\n", currentBlock->bbNum, + firstCloneBlock->bbNum); + // This call returns to the expected spot, so // retarget it to branch to the clone. currentBlock->bbJumpDest = firstCloneBlock; @@ -1107,6 +1179,11 @@ PhaseStatus Compiler::fgCloneFinally() // Make sure iteration isn't going off the deep end. assert(leaveBlock != endCallFinallyRangeBlock); + + if (currentBlock->hasProfileWeight()) + { + retargetedWeight += currentBlock->bbWeight; + } } else { @@ -1144,7 +1221,53 @@ PhaseStatus Compiler::fgCloneFinally() // Cleanup the continuation fgCleanupContinuation(normalCallFinallyReturn); - // Todo -- mark cloned blocks as a cloned finally.... + // If we have profile data, compute how the weights split, + // and update the weights in both the clone and the original. + // + // TODO: if original weight is zero, we probably should forgo cloning...? + // + // TODO: it will frequently be the case that the original scale is 0.0 as + // all the profiled flow will go to the clone. + // + // Decide if we really want to set all those counts to zero, and if so + // whether we should mark the original as rarely run. + // + if (usingProfileWeights && (originalWeight > BB_ZERO_WEIGHT)) + { + // We can't leave the finally more often than we enter. + // So cap cloned scale at 1.0f + // + BasicBlock::weight_t const clonedScale = + retargetedWeight < originalWeight ? (retargetedWeight / originalWeight) : 1.0f; + BasicBlock::weight_t const originalScale = 1.0f - clonedScale; + + JITDUMP("Profile scale factor (" FMT_WT "/" FMT_WT ") => clone " FMT_WT " / original " FMT_WT "\n", + retargetedWeight, originalWeight, clonedScale, originalScale); + + for (BasicBlock* block = firstBlock; block != lastBlock->bbNext; block = block->bbNext) + { + if (block->hasProfileWeight()) + { + BasicBlock::weight_t const blockWeight = block->bbWeight; + block->setBBProfileWeight(blockWeight * originalScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", block->bbNum, block->bbWeight); + +#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION + // Handle a special case -- some handler entries can't have zero profile count. + // + if (bbIsHandlerBeg(block) && block->isRunRarely()) + { + JITDUMP("Suppressing zero count for " FMT_BB " as it is a handler entry\n", block->bbNum); + block->makeBlockHot(); + } +#endif + + BasicBlock* const clonedBlock = blockMap[block]; + clonedBlock->setBBProfileWeight(blockWeight * clonedScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", clonedBlock->bbNum, clonedBlock->bbWeight); + } + } + } // Done! JITDUMP("\nDone with EH#%u\n\n", XTnum); @@ -1767,6 +1890,43 @@ bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block, assert(callFinally->bbRefs > 0); fgRemoveRefPred(callFinally, block); + // Update profile counts + // + if (block->hasProfileWeight()) + { + // Add weight to the canonical call finally pair. + // + BasicBlock::weight_t const canonicalWeight = + canonicalCallFinally->hasProfileWeight() ? canonicalCallFinally->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newCanonicalWeight = block->bbWeight + canonicalWeight; + + canonicalCallFinally->setBBProfileWeight(newCanonicalWeight); + + BasicBlock* const canonicalLeaveBlock = canonicalCallFinally->bbNext; + + BasicBlock::weight_t const canonicalLeaveWeight = + canonicalLeaveBlock->hasProfileWeight() ? canonicalLeaveBlock->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newLeaveWeight = block->bbWeight + canonicalLeaveWeight; + + canonicalLeaveBlock->setBBProfileWeight(newLeaveWeight); + + // Remove weight from the old call finally pair. + // + if (callFinally->hasProfileWeight()) + { + BasicBlock::weight_t const newCallFinallyWeight = + callFinally->bbWeight > block->bbWeight ? callFinally->bbWeight - block->bbWeight : BB_ZERO_WEIGHT; + callFinally->setBBProfileWeight(newCallFinallyWeight); + } + + if (leaveBlock->hasProfileWeight()) + { + BasicBlock::weight_t const newLeaveWeight = + leaveBlock->bbWeight > block->bbWeight ? leaveBlock->bbWeight - block->bbWeight : BB_ZERO_WEIGHT; + leaveBlock->setBBProfileWeight(newLeaveWeight); + } + } + return true; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a6aa61578f999..700d920d3fb99 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -9943,8 +9943,7 @@ void Compiler::impImportLeave(BasicBlock* block) step->bbJumpDest->bbRefs++; /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= block->bbFlags & BBF_RUN_RARELY; + callBlock->inheritWeight(block); #ifdef DEBUG if (verbose) @@ -9973,8 +9972,8 @@ void Compiler::impImportLeave(BasicBlock* block) step = fgNewBBafter(BBJ_ALWAYS, callBlock, true); /* The new block will inherit this block's weight */ - step->setBBWeight(block->bbWeight); - step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; + step->inheritWeight(block); + step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; #ifdef DEBUG if (verbose) @@ -10034,8 +10033,7 @@ void Compiler::impImportLeave(BasicBlock* block) step->bbJumpDest = finalStep; /* The new block will inherit this block's weight */ - finalStep->setBBWeight(block->bbWeight); - finalStep->bbFlags |= block->bbFlags & BBF_RUN_RARELY; + finalStep->inheritWeight(block); #ifdef DEBUG if (verbose) @@ -10197,8 +10195,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - exitBlock->setBBWeight(block->bbWeight); - exitBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + exitBlock->inheritWeight(block); + exitBlock->bbFlags |= BBF_IMPORTED; /* This exit block is the new step */ step = exitBlock; @@ -10242,8 +10240,8 @@ void Compiler::impImportLeave(BasicBlock* block) block->bbJumpDest->bbRefs++; /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + callBlock->inheritWeight(block); + callBlock->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose) @@ -10342,8 +10340,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + callBlock->inheritWeight(block); + callBlock->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose) @@ -10359,8 +10357,8 @@ void Compiler::impImportLeave(BasicBlock* block) stepType = ST_FinallyReturn; /* The new block will inherit this block's weight */ - step->setBBWeight(block->bbWeight); - step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; + step->inheritWeight(block); + step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; #ifdef DEBUG if (verbose) @@ -10446,8 +10444,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - catchStep->setBBWeight(block->bbWeight); - catchStep->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + catchStep->inheritWeight(block); + catchStep->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose)