Skip to content

Commit

Permalink
JIT: defer some flow graph reordering until after loop recognition (#…
Browse files Browse the repository at this point in the history
…69878)

The JIT currently will aggressively reorder the flow graph before running its
loop recognition phases. When there is PGO data this sometimes perturbs the
block order so that loops are no longer recognized, and we miss out on some
loop optimizations.

This change defers most block reordering until after the JIT has gone through
the optimization phases. There is still a limited form of flow cleanup done
early on.

There is also a compensating change in loop recognition in one place where it was
relying on adjacent blocks being combined.

Fixes #67318.
  • Loading branch information
AndyAyersMS authored May 30, 2022
1 parent 7d2ab66 commit c0227b4
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 19 deletions.
11 changes: 9 additions & 2 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4796,9 +4796,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops);

// Optimize block order
// Run some flow graph optimizations (but don't reorder)
//
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow);

// Compute reachability sets and dominators.
//
Expand Down Expand Up @@ -4988,6 +4988,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// Insert GC Polls
DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls);

// Optimize block order
//
if (opts.OptimizationEnabled())
{
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
}

// Determine start of cold region if we are hot/cold splitting
//
DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock);
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5223,7 +5223,7 @@ class Compiler
void fgComputeCalledCount(weight_t returnWeight);
void fgComputeEdgeWeights();

bool fgReorderBlocks();
bool fgReorderBlocks(bool useProfile);

PhaseStatus fgDetermineFirstColdBlock();

Expand Down Expand Up @@ -5998,6 +5998,7 @@ class Compiler

public:
PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication
PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method
PhaseStatus optSetBlockWeights();
PhaseStatus optFindLoopsPhase(); // Finds loops and records them in the loop table
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compphases.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets",
#endif // FEATURE_EH_FUNCLETS
CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", "MRGTHROW", false, -1, false)
CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", "LOOP-INV", false, -1, false)
CompPhaseNameMacro(PHASE_OPTIMIZE_FLOW, "Optimize control flow", "OPT-FLOW", false, -1, false)
CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1, false)
CompPhaseNameMacro(PHASE_COMPUTE_REACHABILITY, "Compute blocks reachability", "BL_REACH", false, -1, false)
CompPhaseNameMacro(PHASE_SET_BLOCK_WEIGHTS, "Set block weights", "BL-WEIGHTS", false, -1, false)
Expand Down
37 changes: 27 additions & 10 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4639,14 +4639,22 @@ bool Compiler::fgExpandRarelyRunBlocks()

//-----------------------------------------------------------------------------
// fgReorderBlocks: reorder blocks to favor frequent fall through paths,
// move rare blocks to the end of the method/eh region, and move
// funclets to the ends of methods.
// move rare blocks to the end of the method/eh region, and move
// funclets to the ends of methods.
//
// Arguments:
// useProfile - if true, use profile data (if available) to more aggressively
// reorder the blocks.
//
// Returns:
// True if anything got reordered. Reordering blocks may require changing
// IR to reverse branch conditions.
// True if anything got reordered. Reordering blocks may require changing
// IR to reverse branch conditions.
//
bool Compiler::fgReorderBlocks()
// Notes:
// We currently allow profile-driven switch opts even when useProfile is false,
// as they are unlikely to lead to reordering..
//
bool Compiler::fgReorderBlocks(bool useProfile)
{
noway_assert(opts.compDbgCode == false);

Expand Down Expand Up @@ -4726,7 +4734,7 @@ bool Compiler::fgReorderBlocks()
continue;
}

bool reorderBlock = true; // This is set to false if we decide not to reorder 'block'
bool reorderBlock = useProfile;
bool isRare = block->isRunRarely();
BasicBlock* bDest = nullptr;
bool forwardBranch = false;
Expand All @@ -4752,7 +4760,8 @@ bool Compiler::fgReorderBlocks()

weight_t profHotWeight = -1;

if (bPrev->hasProfileWeight() && block->hasProfileWeight() && ((bDest == nullptr) || bDest->hasProfileWeight()))
if (useProfile && bPrev->hasProfileWeight() && block->hasProfileWeight() &&
((bDest == nullptr) || bDest->hasProfileWeight()))
{
//
// All blocks have profile information
Expand Down Expand Up @@ -5682,13 +5691,21 @@ bool Compiler::fgReorderBlocks()
if (bPrev->bbJumpKind == BBJ_COND)
{
/* Reverse the bPrev jump condition */
Statement* condTestStmt = bPrev->lastStmt();
Statement* const condTestStmt = bPrev->lastStmt();
GenTree* const condTest = condTestStmt->GetRootNode();

GenTree* condTest = condTestStmt->GetRootNode();
noway_assert(condTest->gtOper == GT_JTRUE);

condTest->AsOp()->gtOp1 = gtReverseCond(condTest->AsOp()->gtOp1);

// may need to rethread
//
if (fgStmtListThreaded)
{
JITDUMP("Rethreading " FMT_STMT "\n", condTestStmt->GetID());
gtSetStmtInfo(condTestStmt);
fgSetStmtSeq(condTestStmt);
}

if (bStart2 == nullptr)
{
/* Set the new jump dest for bPrev to the rarely run or uncommon block(s) */
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ PhaseStatus Compiler::fgInsertGCPolls()
if (createdPollBlocks)
{
noway_assert(opts.OptimizationEnabled());
fgReorderBlocks();
fgReorderBlocks(/* useProfileData */ false);
constexpr bool computePreds = true;
constexpr bool computeDoms = false;
fgUpdateChangedFlowGraph(computePreds, computeDoms);
Expand Down
89 changes: 84 additions & 5 deletions src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,51 @@ class LoopSearch
// prevent assertions from complaining about it.
block->bbFlags |= BBF_KEEP_BBJ_ALWAYS;
}

// If block is newNext's only predcessor, move the IR from block to newNext,
// but keep the now-empty block around.
//
// We move the IR because loop recognition has a very limited search capability and
// won't walk from one block's statements to another, even if the blocks form
// a linear chain. So this IR move enhances counted loop recognition.
//
// The logic here is essentially echoing fgCompactBlocks... but we don't call
// that here because we don't want to delete block and do the necessary updates
// to all the other data in flight, and we'd also prefer that newNext be the
// survivor, not block.
//
if ((newNext->bbRefs == 1) && comp->fgCanCompactBlocks(block, newNext))
{
JITDUMP("Moving stmts from " FMT_BB " to " FMT_BB "\n", block->bbNum, newNext->bbNum);
Statement* stmtList1 = block->firstStmt();
Statement* stmtList2 = newNext->firstStmt();

// Is there anything to move?
//
if (stmtList1 != nullptr)
{
// Append newNext stmts to block's stmts.
//
if (stmtList2 != nullptr)
{
Statement* stmtLast1 = block->lastStmt();
Statement* stmtLast2 = newNext->lastStmt();

stmtLast1->SetNextStmt(stmtList2);
stmtList2->SetPrevStmt(stmtLast1);
stmtList1->SetPrevStmt(stmtLast2);
}

// Move block's stmts to newNext
//
newNext->bbStmtList = stmtList1;
block->bbStmtList = nullptr;

// Update newNext's block flags
//
newNext->bbFlags |= (block->bbFlags & BBF_COMPACT_UPD);
}
}
}

// Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS.
Expand Down Expand Up @@ -4797,22 +4842,56 @@ PhaseStatus Compiler::optInvertLoops()
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//-----------------------------------------------------------------------------
// optOptimizeFlow: simplify flow graph
//
// Returns:
// suitable phase status
//
// Notes:
// Does not do profile-based reordering to try and ensure that
// that we recognize and represent as many loops as possible.
//
PhaseStatus Compiler::optOptimizeFlow()
{
noway_assert(opts.OptimizationEnabled());
noway_assert(fgModified == false);

bool madeChanges = false;

madeChanges |= fgUpdateFlowGraph(/* allowTailDuplication */ true);
madeChanges |= fgReorderBlocks(/* useProfileData */ false);
madeChanges |= fgUpdateFlowGraph();

// fgReorderBlocks can cause IR changes even if it does not modify
// the flow graph. It calls gtPrepareCost which can cause operand swapping.
// Work around this for now.
//
// Note phase status only impacts dumping and checking done post-phase,
// it has no impact on a release build.
//
madeChanges = true;

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//-----------------------------------------------------------------------------
// optOptimizeLayout: reorder blocks to reduce cost of control flow
//
// Returns:
// suitable phase status
//
// Notes:
// Reorders using profile data, if available.
//
PhaseStatus Compiler::optOptimizeLayout()
{
noway_assert(opts.OptimizationEnabled());
noway_assert(fgModified == false);

bool madeChanges = false;
const bool allowTailDuplication = true;
bool madeChanges = false;

madeChanges |= fgUpdateFlowGraph(allowTailDuplication);
madeChanges |= fgReorderBlocks();
madeChanges |= fgUpdateFlowGraph(/* allowTailDuplication */ false);
madeChanges |= fgReorderBlocks(/* useProfile */ true);
madeChanges |= fgUpdateFlowGraph();

// fgReorderBlocks can cause IR changes even if it does not modify
Expand Down

0 comments on commit c0227b4

Please sign in to comment.