diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index ba817b5b49091..7de6120fd9b4a 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -779,18 +779,24 @@ PhaseStatus Compiler::fgCloneFinally() " %u blocks, %u statements\n", XTnum, regionBBCount, regionStmtCount); - // Walk the try region backwards looking for the last block - // that transfers control to a callfinally. + // Walk the try region backwards looking for blocks + // that transfer control to a callfinally. + // + // For non-pgo we find the lexically last block; for + // pgo we find the highest-weight block. + // BasicBlock* const firstTryBlock = HBtab->ebdTryBeg; BasicBlock* const lastTryBlock = HBtab->ebdTryLast; assert(firstTryBlock->getTryIndex() == XTnum); assert(bbInTryRegions(XTnum, lastTryBlock)); BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev; - BasicBlock* normalCallFinallyBlock = nullptr; - BasicBlock* normalCallFinallyReturn = nullptr; - BasicBlock* cloneInsertAfter = HBtab->ebdTryLast; - bool tryToRelocateCallFinally = false; + BasicBlock* normalCallFinallyBlock = nullptr; + BasicBlock* normalCallFinallyReturn = nullptr; + BasicBlock* cloneInsertAfter = HBtab->ebdTryLast; + bool tryToRelocateCallFinally = false; + const bool usingProfileWeights = fgIsUsingProfileWeights(); + BasicBlock::weight_t currentWeight = BB_ZERO_WEIGHT; for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->bbPrev) { @@ -830,12 +836,63 @@ PhaseStatus Compiler::fgCloneFinally() BasicBlock* const jumpDest = block; #endif // FEATURE_EH_CALLFINALLY_THUNKS - // Found our block. + // Found a block that invokes the finally. + // BasicBlock* const finallyReturnBlock = jumpDest->bbNext; BasicBlock* const postTryFinallyBlock = finallyReturnBlock->bbJumpDest; + bool isUpdate = false; - normalCallFinallyBlock = jumpDest; - normalCallFinallyReturn = postTryFinallyBlock; + // See if this is the one we want to use to inspire cloning. + // + if (normalCallFinallyBlock == nullptr) + { + normalCallFinallyBlock = jumpDest; + normalCallFinallyReturn = postTryFinallyBlock; + + if (usingProfileWeights) + { + if (block->hasProfileWeight()) + { + JITDUMP("Found profiled " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight); + currentWeight = block->bbWeight; + } + else + { + JITDUMP("Found unprofiled " FMT_BB "\n", block->bbNum); + } + } + } + else + { + assert(usingProfileWeights); + + if (!block->hasProfileWeight()) + { + // An unprofiled block in method with profile data. + // We generally don't expect to see these as the + // blocks in EH regions must have come from the root + // method, which we know has profile data. + // Just skip over them for now. + // + JITDUMP("Skipping past unprofiled " FMT_BB "\n", block->bbNum); + continue; + } + + if (block->bbWeight <= currentWeight) + { + JITDUMP("Skipping past " FMT_BB " with weight " FMT_WT "\n", block->bbNum, block->bbWeight); + continue; + } + + // Prefer this block. + // + JITDUMP("Preferring " FMT_BB " since " FMT_WT " > " FMT_WT "\n", block->bbNum, block->bbWeight, + currentWeight); + normalCallFinallyBlock = jumpDest; + normalCallFinallyReturn = postTryFinallyBlock; + currentWeight = block->bbWeight; + isUpdate = true; + } #if FEATURE_EH_CALLFINALLY_THUNKS // When there are callfinally thunks, we don't expect to see the @@ -850,16 +907,24 @@ PhaseStatus Compiler::fgCloneFinally() // through from the try into the clone. tryToRelocateCallFinally = true; - JITDUMP("Chose path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";" + JITDUMP("%s path to clone: try block " FMT_BB " jumps to callfinally at " FMT_BB ";" " the call returns to " FMT_BB " which jumps to " FMT_BB "\n", - block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); + isUpdate ? "Updating" : "Choosing", block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, + postTryFinallyBlock->bbNum); #else - JITDUMP("Chose path to clone: try block " FMT_BB " is a callfinally;" + JITDUMP("%s path to clone: try block " FMT_BB " is a callfinally;" " the call returns to " FMT_BB " which jumps to " FMT_BB "\n", - block->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum); + isUpdate ? "Updating" : "Choosing", block->bbNum, finallyReturnBlock->bbNum, + postTryFinallyBlock->bbNum); #endif // FEATURE_EH_CALLFINALLY_THUNKS - break; + // For non-pgo just take the first one we find. + // For pgo, keep searching in case we find one we like better. + // + if (!usingProfileWeights) + { + break; + } } // If there is no call to the finally, don't clone. @@ -956,11 +1021,14 @@ PhaseStatus Compiler::fgCloneFinally() // // Clone the finally body, and splice it into the flow graph // within in the parent region of the try. - const unsigned finallyTryIndex = firstBlock->bbTryIndex; - BasicBlock* insertAfter = nullptr; - BlockToBlockMap blockMap(getAllocator()); - bool clonedOk = true; - unsigned cloneBBCount = 0; + // + const unsigned finallyTryIndex = firstBlock->bbTryIndex; + BasicBlock* insertAfter = nullptr; + BlockToBlockMap blockMap(getAllocator()); + bool clonedOk = true; + unsigned cloneBBCount = 0; + BasicBlock::weight_t const originalWeight = + firstBlock->hasProfileWeight() ? firstBlock->bbWeight : BB_ZERO_WEIGHT; for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext) { @@ -1067,9 +1135,10 @@ PhaseStatus Compiler::fgCloneFinally() // Modify the targeting call finallys to branch to the cloned // finally. Make a note if we see some calls that can't be // retargeted (since they want to return to other places). - BasicBlock* const firstCloneBlock = blockMap[firstBlock]; - bool retargetedAllCalls = true; - BasicBlock* currentBlock = firstCallFinallyRangeBlock; + BasicBlock* const firstCloneBlock = blockMap[firstBlock]; + bool retargetedAllCalls = true; + BasicBlock* currentBlock = firstCallFinallyRangeBlock; + BasicBlock::weight_t retargetedWeight = BB_ZERO_WEIGHT; while (currentBlock != endCallFinallyRangeBlock) { @@ -1088,6 +1157,9 @@ PhaseStatus Compiler::fgCloneFinally() // by the cloned finally and by the called finally. if (postTryFinallyBlock == normalCallFinallyReturn) { + JITDUMP("Retargeting callfinally " FMT_BB " to clone entry " FMT_BB "\n", currentBlock->bbNum, + firstCloneBlock->bbNum); + // This call returns to the expected spot, so // retarget it to branch to the clone. currentBlock->bbJumpDest = firstCloneBlock; @@ -1107,6 +1179,11 @@ PhaseStatus Compiler::fgCloneFinally() // Make sure iteration isn't going off the deep end. assert(leaveBlock != endCallFinallyRangeBlock); + + if (currentBlock->hasProfileWeight()) + { + retargetedWeight += currentBlock->bbWeight; + } } else { @@ -1144,7 +1221,53 @@ PhaseStatus Compiler::fgCloneFinally() // Cleanup the continuation fgCleanupContinuation(normalCallFinallyReturn); - // Todo -- mark cloned blocks as a cloned finally.... + // If we have profile data, compute how the weights split, + // and update the weights in both the clone and the original. + // + // TODO: if original weight is zero, we probably should forgo cloning...? + // + // TODO: it will frequently be the case that the original scale is 0.0 as + // all the profiled flow will go to the clone. + // + // Decide if we really want to set all those counts to zero, and if so + // whether we should mark the original as rarely run. + // + if (usingProfileWeights && (originalWeight > BB_ZERO_WEIGHT)) + { + // We can't leave the finally more often than we enter. + // So cap cloned scale at 1.0f + // + BasicBlock::weight_t const clonedScale = + retargetedWeight < originalWeight ? (retargetedWeight / originalWeight) : 1.0f; + BasicBlock::weight_t const originalScale = 1.0f - clonedScale; + + JITDUMP("Profile scale factor (" FMT_WT "/" FMT_WT ") => clone " FMT_WT " / original " FMT_WT "\n", + retargetedWeight, originalWeight, clonedScale, originalScale); + + for (BasicBlock* block = firstBlock; block != lastBlock->bbNext; block = block->bbNext) + { + if (block->hasProfileWeight()) + { + BasicBlock::weight_t const blockWeight = block->bbWeight; + block->setBBProfileWeight(blockWeight * originalScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", block->bbNum, block->bbWeight); + +#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION + // Handle a special case -- some handler entries can't have zero profile count. + // + if (bbIsHandlerBeg(block) && block->isRunRarely()) + { + JITDUMP("Suppressing zero count for " FMT_BB " as it is a handler entry\n", block->bbNum); + block->makeBlockHot(); + } +#endif + + BasicBlock* const clonedBlock = blockMap[block]; + clonedBlock->setBBProfileWeight(blockWeight * clonedScale); + JITDUMP("Set weight of " FMT_BB " to " FMT_WT "\n", clonedBlock->bbNum, clonedBlock->bbWeight); + } + } + } // Done! JITDUMP("\nDone with EH#%u\n\n", XTnum); @@ -1767,6 +1890,43 @@ bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block, assert(callFinally->bbRefs > 0); fgRemoveRefPred(callFinally, block); + // Update profile counts + // + if (block->hasProfileWeight()) + { + // Add weight to the canonical call finally pair. + // + BasicBlock::weight_t const canonicalWeight = + canonicalCallFinally->hasProfileWeight() ? canonicalCallFinally->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newCanonicalWeight = block->bbWeight + canonicalWeight; + + canonicalCallFinally->setBBProfileWeight(newCanonicalWeight); + + BasicBlock* const canonicalLeaveBlock = canonicalCallFinally->bbNext; + + BasicBlock::weight_t const canonicalLeaveWeight = + canonicalLeaveBlock->hasProfileWeight() ? canonicalLeaveBlock->bbWeight : BB_ZERO_WEIGHT; + BasicBlock::weight_t const newLeaveWeight = block->bbWeight + canonicalLeaveWeight; + + canonicalLeaveBlock->setBBProfileWeight(newLeaveWeight); + + // Remove weight from the old call finally pair. + // + if (callFinally->hasProfileWeight()) + { + BasicBlock::weight_t const newCallFinallyWeight = + callFinally->bbWeight > block->bbWeight ? callFinally->bbWeight - block->bbWeight : BB_ZERO_WEIGHT; + callFinally->setBBProfileWeight(newCallFinallyWeight); + } + + if (leaveBlock->hasProfileWeight()) + { + BasicBlock::weight_t const newLeaveWeight = + leaveBlock->bbWeight > block->bbWeight ? leaveBlock->bbWeight - block->bbWeight : BB_ZERO_WEIGHT; + leaveBlock->setBBProfileWeight(newLeaveWeight); + } + } + return true; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a6aa61578f999..700d920d3fb99 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -9943,8 +9943,7 @@ void Compiler::impImportLeave(BasicBlock* block) step->bbJumpDest->bbRefs++; /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= block->bbFlags & BBF_RUN_RARELY; + callBlock->inheritWeight(block); #ifdef DEBUG if (verbose) @@ -9973,8 +9972,8 @@ void Compiler::impImportLeave(BasicBlock* block) step = fgNewBBafter(BBJ_ALWAYS, callBlock, true); /* The new block will inherit this block's weight */ - step->setBBWeight(block->bbWeight); - step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; + step->inheritWeight(block); + step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; #ifdef DEBUG if (verbose) @@ -10034,8 +10033,7 @@ void Compiler::impImportLeave(BasicBlock* block) step->bbJumpDest = finalStep; /* The new block will inherit this block's weight */ - finalStep->setBBWeight(block->bbWeight); - finalStep->bbFlags |= block->bbFlags & BBF_RUN_RARELY; + finalStep->inheritWeight(block); #ifdef DEBUG if (verbose) @@ -10197,8 +10195,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - exitBlock->setBBWeight(block->bbWeight); - exitBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + exitBlock->inheritWeight(block); + exitBlock->bbFlags |= BBF_IMPORTED; /* This exit block is the new step */ step = exitBlock; @@ -10242,8 +10240,8 @@ void Compiler::impImportLeave(BasicBlock* block) block->bbJumpDest->bbRefs++; /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + callBlock->inheritWeight(block); + callBlock->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose) @@ -10342,8 +10340,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - callBlock->setBBWeight(block->bbWeight); - callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + callBlock->inheritWeight(block); + callBlock->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose) @@ -10359,8 +10357,8 @@ void Compiler::impImportLeave(BasicBlock* block) stepType = ST_FinallyReturn; /* The new block will inherit this block's weight */ - step->setBBWeight(block->bbWeight); - step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; + step->inheritWeight(block); + step->bbFlags |= BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS; #ifdef DEBUG if (verbose) @@ -10446,8 +10444,8 @@ void Compiler::impImportLeave(BasicBlock* block) #endif // defined(TARGET_ARM) /* The new block will inherit this block's weight */ - catchStep->setBBWeight(block->bbWeight); - catchStep->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED; + catchStep->inheritWeight(block); + catchStep->bbFlags |= BBF_IMPORTED; #ifdef DEBUG if (verbose)