diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 41166b2d344a5..b75a5655fd75b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4838,19 +4838,23 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags if (opts.OptimizationEnabled()) { + // Invert loops + // + DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); + // Optimize block order // DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout); + // Compute reachability sets and dominators. // DoPhase(this, PHASE_COMPUTE_REACHABILITY, &Compiler::fgComputeReachability); - // Perform loop inversion (i.e. transform "while" loops into - // "repeat" loops) and discover and classify natural loops + // Discover and classify natural loops // (e.g. mark iterative loops as such). Also marks loop blocks // and sets bbWeight to the loop nesting levels // - DoPhase(this, PHASE_OPTIMIZE_LOOPS, &Compiler::optOptimizeLoops); + DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoops); // Clone loops with optimization opportunities, and // choose the one based on dynamic condition evaluation. @@ -5299,10 +5303,8 @@ void Compiler::RecomputeLoopInfo() block->bbFlags &= ~BBF_LOOP_FLAGS; } fgComputeReachability(); - // Rebuild the loop tree annotations themselves. Since this is performed as - // part of 'optOptimizeLoops', this will also re-perform loop rotation, but - // not other optimizations, as the others are not part of 'optOptimizeLoops'. - optOptimizeLoops(); + // Rebuild the loop tree annotations themselves + optFindLoops(); } /*****************************************************************************/ diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 317d310e84a7a..f807a5c0f47bd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5383,7 +5383,7 @@ class Compiler void fgComputeCalledCount(BasicBlock::weight_t returnWeight); void fgComputeEdgeWeights(); - void fgReorderBlocks(); + bool fgReorderBlocks(); void fgDetermineFirstColdBlock(); @@ -5451,8 +5451,13 @@ class Compiler void fgDebugCheckFlagsHelper(GenTree* tree, unsigned treeFlags, unsigned chkFlags); void fgDebugCheckTryFinallyExits(); void fgDebugCheckProfileData(); + bool fgDebugCheckIncomingProfileData(BasicBlock* block); + bool fgDebugCheckOutgoingProfileData(BasicBlock* block); #endif + bool fgProfileWeightsEqual(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2); + bool fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2); + static GenTree* fgGetFirstNode(GenTree* tree); //--------------------- Walking the trees in the IR ----------------------- @@ -6119,11 +6124,9 @@ class Compiler void optOptimizeBoolsGcStress(BasicBlock* condBlock); #endif public: - void optOptimizeLayout(); // Optimize the BasicBlock layout of the method - - void optOptimizeLoops(); // for "while-do" loops duplicates simple loop conditions and transforms - // the loop into a "do-while" loop - // Also finds all natural loops and records them in the loop table + PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom. + PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method + PhaseStatus optFindLoops(); // Finds loops and records them in the loop table // Optionally clone loops in the loop table. void optCloneLoops(); @@ -6449,7 +6452,7 @@ class Compiler } } - void fgOptWhileLoop(BasicBlock* block); + void optInvertWhileLoop(BasicBlock* block); bool optComputeLoopRep(int constInit, int constLimit, diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index fc6df58ad4bd7..49303ecbd64b9 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -53,10 +53,11 @@ CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1, false CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", "EH-FUNC", false, -1, false) #endif // FEATURE_EH_FUNCLETS CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", "MRGTHROW", false, -1, false) +CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", "LOOP-INV", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1, false) CompPhaseNameMacro(PHASE_COMPUTE_REACHABILITY, "Compute blocks reachability", "BL_REACH", false, -1, false) CompPhaseNameMacro(PHASE_ZERO_INITS, "Redundant zero Inits", "ZERO-INIT", false, -1, false) -CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", "LOOP-OPT", false, -1, false) +CompPhaseNameMacro(PHASE_FIND_LOOPS, "Find loops", "LOOP-FND", false, -1, false) CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1, false) CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1, false) CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1, false) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 8941c5b838e3f..5fb1255252299 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -3661,15 +3661,17 @@ bool Compiler::fgOptimizeSwitchJumps() #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif -/***************************************************************************** - * - * Function called to reorder the flowgraph of BasicBlocks such that any - * rarely run blocks are placed at the end of the block list. - * If we have profile information we also use that information to reverse - * all conditional jumps that would benefit. - */ -void Compiler::fgReorderBlocks() +//----------------------------------------------------------------------------- +// fgReorderBlocks: reorder blocks to favor frequent fall through paths, +// move rare blocks to the end of the method/eh region, and move +// funclets to the ends of methods. +// +// Returns: +// True if anything got reordered. Reordering blocks may require changing +// IR to reverse branch conditions. +// +bool Compiler::fgReorderBlocks() { noway_assert(opts.compDbgCode == false); @@ -3680,12 +3682,13 @@ void Compiler::fgReorderBlocks() // We can't relocate anything if we only have one block if (fgFirstBB->bbNext == nullptr) { - return; + return false; } bool newRarelyRun = false; bool movedBlocks = false; bool optimizedSwitches = false; + bool optimizedBranches = false; // First let us expand the set of run rarely blocks newRarelyRun |= fgExpandRarelyRunBlocks(); @@ -4094,9 +4097,11 @@ void Compiler::fgReorderBlocks() // Check for an unconditional branch to a conditional branch // which also branches back to our next block // - if (fgOptimizeBranch(bPrev)) + const bool optimizedBranch = fgOptimizeBranch(bPrev); + if (optimizedBranch) { noway_assert(bPrev->bbJumpKind == BBJ_COND); + optimizedBranches = true; } continue; } @@ -4816,7 +4821,7 @@ void Compiler::fgReorderBlocks() } // end of for loop(bPrev,block) - bool changed = movedBlocks || newRarelyRun || optimizedSwitches; + const bool changed = movedBlocks || newRarelyRun || optimizedSwitches || optimizedBranches; if (changed) { @@ -4829,6 +4834,8 @@ void Compiler::fgReorderBlocks() } #endif // DEBUG } + + return changed; } #ifdef _PREFAST_ #pragma warning(pop) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index d2c567fbfc40e..6481095e31f17 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -3129,6 +3129,11 @@ void Compiler::fgComputeEdgeWeights() if (!assignOK) { // Here we have inconsistent profile data + JITDUMP("Inconsistent profile data at " FMT_BB " -> " FMT_BB + ": dest weight %f, min/max into dest is %f/%f, edge %f/%f\n", + bSrc->bbNum, bDst->bbNum, bDstWeight, minEdgeWeightSum, maxEdgeWeightSum, + edge->edgeWeightMin(), edge->edgeWeightMax()); + inconsistentProfileData = true; // No point in continuing goto EARLY_EXIT; @@ -3222,6 +3227,43 @@ EARLY_EXIT:; fgEdgeWeightsComputed = true; } +//------------------------------------------------------------------------ +// fgProfileWeightsEqual: check if two profile weights are equal +// (or nearly so) +// +// Arguments: +// weight1 -- first weight +// weight2 -- second weight +// +// Notes: +// In most cases you should probably call fgProfileWeightsConsistent instead +// of this method. +// +bool Compiler::fgProfileWeightsEqual(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2) +{ + return fabs(weight1 - weight2) < 0.01; +} + +//------------------------------------------------------------------------ +// fgProfileWeightsConsistentEqual: check if two profile weights are within +// some small percentage of one another. +// +// Arguments: +// weight1 -- first weight +// weight2 -- second weight +// +bool Compiler::fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2) +{ + if (weight2 == 0) + { + return fgProfileWeightsEqual(weight1, weight2); + } + + BasicBlock::weight_t const relativeDiff = (weight2 - weight1) / weight2; + + return fgProfileWeightsEqual(relativeDiff, 0.0f); +} + #ifdef DEBUG //------------------------------------------------------------------------ @@ -3312,118 +3354,22 @@ void Compiler::fgDebugCheckProfileData() // But we have two edge counts... so for now we simply check if the block // count falls within the [min,max] range. // + bool incomingConsistent = true; + bool outgoingConsistent = true; + if (verifyIncoming) { - BasicBlock::weight_t incomingWeightMin = 0; - BasicBlock::weight_t incomingWeightMax = 0; - bool foundPreds = false; - - for (flowList* predEdge = block->bbPreds; predEdge != nullptr; predEdge = predEdge->flNext) - { - incomingWeightMin += predEdge->edgeWeightMin(); - incomingWeightMax += predEdge->edgeWeightMax(); - foundPreds = true; - } - - if (!foundPreds) - { - // Might need to tone this down as we could see unreachable blocks? - problemBlocks++; - JITDUMP(" " FMT_BB " - expected to see predecessors\n", block->bbNum); - } - else - { - if (incomingWeightMin > incomingWeightMax) - { - problemBlocks++; - JITDUMP(" " FMT_BB " - incoming min %d > incoming max %d\n", block->bbNum, incomingWeightMin, - incomingWeightMax); - } - else if (blockWeight < incomingWeightMin) - { - problemBlocks++; - JITDUMP(" " FMT_BB " - block weight %d < incoming min %d\n", block->bbNum, blockWeight, - incomingWeightMin); - } - else if (blockWeight > incomingWeightMax) - { - problemBlocks++; - JITDUMP(" " FMT_BB " - block weight %d > incoming max %d\n", block->bbNum, blockWeight, - incomingWeightMax); - } - } + incomingConsistent = fgDebugCheckIncomingProfileData(block); } if (verifyOutgoing) { - const unsigned numSuccs = block->NumSucc(); - - if (numSuccs == 0) - { - problemBlocks++; - JITDUMP(" " FMT_BB " - expected to see successors\n", block->bbNum); - } - else - { - BasicBlock::weight_t outgoingWeightMin = 0; - BasicBlock::weight_t outgoingWeightMax = 0; - - // Walking successor edges is a bit wonky. Seems like it should be easier. - // Note this can also fail to enumerate all the edges, if we have a multigraph - // - int missingEdges = 0; - - for (unsigned i = 0; i < numSuccs; i++) - { - BasicBlock* succBlock = block->GetSucc(i); - flowList* succEdge = nullptr; - - for (flowList* edge = succBlock->bbPreds; edge != nullptr; edge = edge->flNext) - { - if (edge->getBlock() == block) - { - succEdge = edge; - break; - } - } - - if (succEdge == nullptr) - { - missingEdges++; - JITDUMP(" " FMT_BB " can't find successor edge to " FMT_BB "\n", block->bbNum, - succBlock->bbNum); - } - else - { - outgoingWeightMin += succEdge->edgeWeightMin(); - outgoingWeightMax += succEdge->edgeWeightMax(); - } - } + outgoingConsistent = fgDebugCheckOutgoingProfileData(block); + } - if (missingEdges > 0) - { - JITDUMP(" " FMT_BB " - missing %d successor edges\n", block->bbNum, missingEdges); - problemBlocks++; - } - if (outgoingWeightMin > outgoingWeightMax) - { - problemBlocks++; - JITDUMP(" " FMT_BB " - outgoing min %d > outgoing max %d\n", block->bbNum, outgoingWeightMin, - outgoingWeightMax); - } - else if (blockWeight < outgoingWeightMin) - { - problemBlocks++; - JITDUMP(" " FMT_BB " - block weight %d < outgoing min %d\n", block->bbNum, blockWeight, - outgoingWeightMin); - } - else if (blockWeight > outgoingWeightMax) - { - problemBlocks++; - JITDUMP(" " FMT_BB " - block weight %d > outgoing max %d\n", block->bbNum, blockWeight, - outgoingWeightMax); - } - } + if (!incomingConsistent || !outgoingConsistent) + { + problemBlocks++; } } @@ -3434,7 +3380,7 @@ void Compiler::fgDebugCheckProfileData() if (entryWeight != exitWeight) { problemBlocks++; - JITDUMP(" Entry %d exit %d mismatch\n", entryWeight, exitWeight); + JITDUMP(" Entry %f exit %f weight mismatch\n", entryWeight, exitWeight); } } @@ -3464,4 +3410,145 @@ void Compiler::fgDebugCheckProfileData() } } +//------------------------------------------------------------------------ +// fgDebugCheckIncomingProfileData: verify profile data flowing into a +// block matches the profile weight of the block. +// +// Arguments: +// block - block to check +// +// Returns: +// true if counts consistent, false otherwise. +// +// Notes: +// Only useful to call on blocks with predecessors. +// +bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block) +{ + BasicBlock::weight_t const blockWeight = block->bbWeight; + BasicBlock::weight_t incomingWeightMin = 0; + BasicBlock::weight_t incomingWeightMax = 0; + bool foundPreds = false; + + for (flowList* predEdge = block->bbPreds; predEdge != nullptr; predEdge = predEdge->flNext) + { + incomingWeightMin += predEdge->edgeWeightMin(); + incomingWeightMax += predEdge->edgeWeightMax(); + foundPreds = true; + } + + if (!foundPreds) + { + // Assume this is ok. + // + return true; + } + + if (incomingWeightMin > incomingWeightMax) + { + JITDUMP(" " FMT_BB " - incoming min %f > incoming max %f\n", block->bbNum, incomingWeightMin, + incomingWeightMax); + return false; + } + + if (blockWeight < incomingWeightMin) + { + JITDUMP(" " FMT_BB " - block weight %f < incoming min %f\n", block->bbNum, blockWeight, incomingWeightMin); + return false; + } + + if (blockWeight > incomingWeightMax) + { + JITDUMP(" " FMT_BB " - block weight %f > incoming max %f\n", block->bbNum, blockWeight, incomingWeightMax); + return false; + } + + return true; +} + +//------------------------------------------------------------------------ +// fgDebugCheckOutgoingProfileData: verify profile data flowing out of +// a block matches the profile weight of the block. +// +// Arguments: +// block - block to check +// +// Returns: +// true if counts consistent, false otherwise. +// +// Notes: +// Only useful to call on blocks with successors. +// +bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block) +{ + const unsigned numSuccs = block->NumSucc(); + + if (numSuccs == 0) + { + // Assume this is ok. + // + return true; + } + + BasicBlock::weight_t const blockWeight = block->bbWeight; + BasicBlock::weight_t outgoingWeightMin = 0; + BasicBlock::weight_t outgoingWeightMax = 0; + + // Walking successor edges is a bit wonky. Seems like it should be easier. + // Note this can also fail to enumerate all the edges, if we have a multigraph + // + int missingEdges = 0; + + for (unsigned i = 0; i < numSuccs; i++) + { + BasicBlock* succBlock = block->GetSucc(i); + flowList* succEdge = nullptr; + + for (flowList* edge = succBlock->bbPreds; edge != nullptr; edge = edge->flNext) + { + if (edge->getBlock() == block) + { + succEdge = edge; + break; + } + } + + if (succEdge == nullptr) + { + missingEdges++; + JITDUMP(" " FMT_BB " can't find successor edge to " FMT_BB "\n", block->bbNum, succBlock->bbNum); + continue; + } + + outgoingWeightMin += succEdge->edgeWeightMin(); + outgoingWeightMax += succEdge->edgeWeightMax(); + } + + if (missingEdges > 0) + { + JITDUMP(" " FMT_BB " - missing %d successor edges\n", block->bbNum, missingEdges); + } + + if (outgoingWeightMin > outgoingWeightMax) + { + JITDUMP(" " FMT_BB " - outgoing min %f > outgoing max %f\n", block->bbNum, outgoingWeightMin, + outgoingWeightMax); + return false; + } + + if (blockWeight < outgoingWeightMin) + { + JITDUMP(" " FMT_BB " - block weight %f < outgoing min %f\n", block->bbNum, blockWeight, outgoingWeightMin); + return false; + } + + if (blockWeight > outgoingWeightMax) + { + JITDUMP(" " FMT_BB " - block weight %f > outgoing max %f\n", block->bbNum, blockWeight, outgoingWeightMax); + return false; + } + + return missingEdges == 0; +} + #endif // DEBUG diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 0cf14f07e66e8..2caba01ff5d80 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -4085,11 +4085,23 @@ static Statement* optFindLoopTermTest(BasicBlock* bottom) return result; } -/***************************************************************************** - * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }" - */ - -void Compiler::fgOptWhileLoop(BasicBlock* block) +//----------------------------------------------------------------------------- +// optInvertWhileLoop: modify flow and duplicate code so that for/while loops are +// entered at top and tested at bottom (aka loop rotation or bottom testing). +// +// Arguments: +// block -- block that may be the predecessor of the un-rotated loop's test block. +// +// Notes: +// Optimizes "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }" +// Does not modify every loop +// +// Makes no changes if the flow pattern match fails. +// +// May not modify a loop if profile is unfavorable, if the cost of duplicating +// code is large (factoring in potential CSEs) +// +void Compiler::optInvertWhileLoop(BasicBlock* block) { noway_assert(opts.OptimizationEnabled()); noway_assert(compCodeOpt() != SMALL_CODE); @@ -4211,12 +4223,12 @@ void Compiler::fgOptWhileLoop(BasicBlock* block) gtPrepareCost(condTree); unsigned estDupCostSz = condTree->GetCostSz(); - double loopIterations = (double)BB_LOOP_WEIGHT_SCALE; + BasicBlock::weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool allProfileWeightsAreValid = false; - BasicBlock::weight_t weightBlock = block->bbWeight; - BasicBlock::weight_t weightTest = bTest->bbWeight; - BasicBlock::weight_t weightNext = block->bbNext->bbWeight; + bool allProfileWeightsAreValid = false; + BasicBlock::weight_t const weightBlock = block->bbWeight; + BasicBlock::weight_t const weightTest = bTest->bbWeight; + BasicBlock::weight_t const weightNext = block->bbNext->bbWeight; // If we have profile data then we calculate the number of time // the loop will iterate into loopIterations @@ -4226,26 +4238,39 @@ void Compiler::fgOptWhileLoop(BasicBlock* block) // have good profile weights if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight()) { - allProfileWeightsAreValid = true; - // If this while loop never iterates then don't bother transforming + // if (weightNext == 0) { return; } - // with (weighNext > 0) we should also have (weightTest >= weightBlock) - // if the profile weights are all valid. + // We generally expect weightTest == weightNext + weightBlock. // - // weightNext is the number of time this loop iterates - // weightBlock is the number of times that we enter the while loop - // loopIterations is the average number of times that this loop iterates + // Tolerate small inconsistencies... // - if (weightTest >= weightBlock) + if (!fgProfileWeightsConsistent(weightBlock + weightNext, weightTest)) { - loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight; + JITDUMP("Profile weights locally inconsistent: block %f, next %f, test %f\n", weightBlock, weightNext, + weightTest); + } + else + { + allProfileWeightsAreValid = true; + + // Determine iteration count + // + // weightNext is the number of time this loop iterates + // weightBlock is the number of times that we enter the while loop + // loopIterations is the average number of times that this loop iterates + // + loopIterations = weightNext / weightBlock; } } + else + { + JITDUMP("Missing profile data for loop!\n"); + } } unsigned maxDupCostSz = 32; @@ -4335,44 +4360,6 @@ void Compiler::fgOptWhileLoop(BasicBlock* block) block->bbFlags |= copyFlags; } - // If we have profile data for all blocks and we know that we are cloning the - // bTest block into block and thus changing the control flow from block so - // that it no longer goes directly to bTest anymore, we have to adjust the - // weight of bTest by subtracting out the weight of block. - // - if (allProfileWeightsAreValid) - { - // - // Some additional sanity checks before adjusting the weight of bTest - // - if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT)) - { - // Get the two edge that flow out of bTest - flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest); - flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest); - - // Calculate the new weight for block bTest - - BasicBlock::weight_t newWeightTest = - (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT; - bTest->bbWeight = newWeightTest; - - if (newWeightTest == BB_ZERO_WEIGHT) - { - bTest->bbFlags |= BBF_RUN_RARELY; - // All out edge weights are set to zero - edgeToNext->setEdgeWeights(BB_ZERO_WEIGHT, BB_ZERO_WEIGHT); - edgeToJump->setEdgeWeights(BB_ZERO_WEIGHT, BB_ZERO_WEIGHT); - } - else - { - // Update the our edge weights - edgeToNext->setEdgeWeights(BB_ZERO_WEIGHT, min(edgeToNext->edgeWeightMax(), newWeightTest)); - edgeToJump->setEdgeWeights(BB_ZERO_WEIGHT, min(edgeToJump->edgeWeightMax(), newWeightTest)); - } - } - } - /* Change the block to end with a conditional jump */ block->bbJumpKind = BBJ_COND; @@ -4391,7 +4378,7 @@ void Compiler::fgOptWhileLoop(BasicBlock* block) #ifdef DEBUG if (verbose) { - printf("\nDuplicating loop condition in " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")", block->bbNum, + printf("\nDuplicated loop condition in " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")", block->bbNum, block->bbNext->bbNum, bTest->bbNum); printf("\nEstimated code size expansion is %d\n ", estDupCostSz); @@ -4399,34 +4386,91 @@ void Compiler::fgOptWhileLoop(BasicBlock* block) } #endif -} -/***************************************************************************** - * - * Optimize the BasicBlock layout of the method - */ + // If we have profile data for all blocks and we know that we are cloning the + // bTest block into block and thus changing the control flow from block so + // that it no longer goes directly to bTest anymore, we have to adjust + // various weights. + // + if (allProfileWeightsAreValid) + { + // Update the weight for bTest + // + JITDUMP("Reducing profile weight of " FMT_BB " from %f to %f\n", bTest->bbNum, weightTest, weightNext); + bTest->bbWeight = weightNext; -void Compiler::optOptimizeLayout() -{ - noway_assert(opts.OptimizationEnabled()); + // Determine the new edge weights. + // + // We project the next/jump ratio for block and bTest by using + // the original likelihoods out of bTest. + // + // Note "next" is the loop top block, not bTest's bbNext, + // we'll call this latter block "after". + // + BasicBlock::weight_t const testToNextLikelihood = weightNext / weightTest; + BasicBlock::weight_t const testToAfterLikelihood = 1.0f - testToNextLikelihood; -#ifdef DEBUG - if (verbose) - { - printf("*************** In optOptimizeLayout()\n"); - fgDispHandlerTab(); - } + // Adjust edges out of bTest (which now has weight weightNext) + // + BasicBlock::weight_t const testToNextWeight = weightNext * testToNextLikelihood; + BasicBlock::weight_t const testToAfterWeight = weightNext * testToAfterLikelihood; - /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */ - fgDebugCheckBBlist(); + flowList* const edgeTestToNext = fgGetPredForBlock(bTest->bbJumpDest, bTest); + flowList* const edgeTestToAfter = fgGetPredForBlock(bTest->bbNext, bTest); + + JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (iterate loop)\n", bTest->bbNum, + bTest->bbJumpDest->bbNum, testToNextWeight); + JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (exit loop)\n", bTest->bbNum, bTest->bbNext->bbNum, + testToAfterWeight); + + edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight); + edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight); + + // Adjust edges out of block, using the same distribution. + // + JITDUMP("Profile weight of " FMT_BB " remains unchanged at %f\n", block->bbNum, weightBlock); + + BasicBlock::weight_t const blockToNextLikelihood = testToNextLikelihood; + BasicBlock::weight_t const blockToAfterLikelihood = testToAfterLikelihood; + + BasicBlock::weight_t const blockToNextWeight = weightBlock * blockToNextLikelihood; + BasicBlock::weight_t const blockToAfterWeight = weightBlock * blockToAfterLikelihood; + + flowList* const edgeBlockToNext = fgGetPredForBlock(block->bbNext, block); + flowList* const edgeBlockToAfter = fgGetPredForBlock(block->bbJumpDest, block); + + JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (enter loop)\n", block->bbNum, block->bbNext->bbNum, + blockToNextWeight); + JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (avoid loop)\n", block->bbNum, + block->bbJumpDest->bbNum, blockToAfterWeight); + + edgeBlockToNext->setEdgeWeights(blockToNextWeight, blockToNextWeight); + edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight); + +#ifdef DEBUG + // Verify profile for the two target blocks is consistent. + // + fgDebugCheckIncomingProfileData(block->bbNext); + fgDebugCheckIncomingProfileData(block->bbJumpDest); #endif + } +} +//----------------------------------------------------------------------------- +// optInvertLoops: invert while loops in the method +// +// Returns: +// suitable phase status +// +PhaseStatus Compiler::optInvertLoops() +{ + noway_assert(opts.OptimizationEnabled()); noway_assert(fgModified == false); for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { - /* Make sure the appropriate fields are initialized */ - + // Make sure the appropriate fields are initialized + // if (block->bbWeight == BB_ZERO_WEIGHT) { /* Zero weighted block can't have a LOOP_HEAD flag */ @@ -4436,36 +4480,58 @@ void Compiler::optOptimizeLayout() if (compCodeOpt() != SMALL_CODE) { - /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */ - - fgOptWhileLoop(block); + optInvertWhileLoop(block); } } - if (fgModified) + const bool madeChanges = fgModified; + + if (madeChanges) { - // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop - fgComputeEdgeWeights(); + // Reset fgModified here as we've done a consistent set of edits. + // + fgModified = false; } - fgUpdateFlowGraph(true); - fgReorderBlocks(); - fgUpdateFlowGraph(); + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } -/***************************************************************************** - * - * Perform loop inversion, find and classify natural loops - */ +//----------------------------------------------------------------------------- +// optOptimizeLayout: reorder blocks to reduce cost of control flow +// +// Returns: +// suitable phase status +// +PhaseStatus Compiler::optOptimizeLayout() +{ + noway_assert(opts.OptimizationEnabled()); + noway_assert(fgModified == false); -void Compiler::optOptimizeLoops() + bool madeChanges = false; + const bool allowTailDuplication = true; + + madeChanges |= fgUpdateFlowGraph(allowTailDuplication); + madeChanges |= fgReorderBlocks(); + madeChanges |= fgUpdateFlowGraph(); + + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} + +//----------------------------------------------------------------------------- +// optFindLoops: find and classify natural loops +// +// Notes: +// Also (re)sets all non-IBC block weights, and marks loops potentially needing +// alignment padding. +// +PhaseStatus Compiler::optFindLoops() { noway_assert(opts.OptimizationEnabled()); #ifdef DEBUG if (verbose) { - printf("*************** In optOptimizeLoops()\n"); + printf("*************** In optFindLoops()\n"); } #endif @@ -4579,6 +4645,8 @@ void Compiler::optOptimizeLoops() #endif optLoopsMarked = true; } + + return PhaseStatus::MODIFIED_EVERYTHING; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 860f7d66057f0..b56d29b2b519e 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -157,15 +157,16 @@ void Phase::PostPhase(PhaseStatus status) // well as the new-style phases that have been updated to return // PhaseStatus from their DoPhase methods. // - static Phases s_allowlist[] = {PHASE_IMPORTATION, PHASE_IBCINSTR, - PHASE_IBCPREP, PHASE_INCPROFILE, - PHASE_INDXCALL, PHASE_MORPH_INLINE, - PHASE_ALLOCATE_OBJECTS, PHASE_EMPTY_TRY, - PHASE_EMPTY_FINALLY, PHASE_MERGE_FINALLY_CHAINS, - PHASE_CLONE_FINALLY, PHASE_MERGE_THROWS, - PHASE_MORPH_GLOBAL, PHASE_BUILD_SSA, - PHASE_RATIONALIZE, PHASE_LOWERING, - PHASE_STACK_LEVEL_SETTER}; + static Phases s_allowlist[] = {PHASE_IMPORTATION, PHASE_IBCINSTR, + PHASE_IBCPREP, PHASE_INCPROFILE, + PHASE_INDXCALL, PHASE_MORPH_INLINE, + PHASE_ALLOCATE_OBJECTS, PHASE_EMPTY_TRY, + PHASE_EMPTY_FINALLY, PHASE_MERGE_FINALLY_CHAINS, + PHASE_CLONE_FINALLY, PHASE_MERGE_THROWS, + PHASE_MORPH_GLOBAL, PHASE_INVERT_LOOPS, + PHASE_OPTIMIZE_LAYOUT, PHASE_FIND_LOOPS, + PHASE_BUILD_SSA, PHASE_RATIONALIZE, + PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER}; if (madeChanges) {