Skip to content

Commit

Permalink
JIT: Factor SSA's DFS and profile synthesis's loop finding (#95251)
Browse files Browse the repository at this point in the history
Factor out SSA's general DFS (that takes EH into account) and
encapsulate it in a `FlowGraphDfsTree` class.

Factor out profile synthesis's loop finding and encapsulate it in a
`FlowGraphNaturalLoops` class. Switch construction of it to use the
general DFS instead of the restricted one (that does not account for
exceptional flow).

Optimize a few things in the process:
* Avoid storing loop blocks in a larger than necessary bit vector; store
  them starting from the loop header's postorder index instead.
* Provide post-order and reverse post-order visitors for the loop
  blocks; switch profile synthesis to use this in a place

No diffs are expected. A small amount of diffs are expected when profile
synthesis is enabled due to the modelling of exceptional flow and also
from handling unreachable predecessors (which would reject some loops as
unnatural loops before).

My future plans are to proceed to replace the loop representation of
loops with this factored version, removing the lexicality requirement in
the process, and hopefully fixing some of our deficiencies.
  • Loading branch information
jakobbotsch authored Nov 28, 2023
1 parent 8ba8da3 commit f106d7e
Show file tree
Hide file tree
Showing 15 changed files with 1,054 additions and 635 deletions.
118 changes: 118 additions & 0 deletions src/coreclr/jit/bitsetasshortlong.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,124 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
}
};

//------------------------------------------------------------------------
// VisitBits: Invoke a callback for each index that is set in the bit
// vector, in ascending order of indices.
//
// Type parameters:
// TFunc - Type of callback functor
//
// Arguments:
// env - The traits
// bs - The bit vector
// func - The functor callback. Return true to continue to the next bit,
// and false to abort.
//
// Returns:
// True if all bits were iterated; false if the callback returned false
// and iteration was aborted.
//
template <typename TFunc>
static bool VisitBits(Env env, BitSetShortLongRep bs, TFunc func)
{
#ifdef HOST_64BIT
#define BitScanForwardSizeT BitScanForward64
#else
#define BitScanForwardSizeT BitScanForward
#endif

if (BitSetOps::IsShort(env))
{
size_t bits = reinterpret_cast<size_t>(bs);
DWORD index;
while (BitScanForwardSizeT(&index, bits))
{
if (!func(index))
return false;

bits ^= size_t(1) << index;
}
}
else
{
unsigned len = BitSetTraits::GetArrSize(env);
for (unsigned i = 0; i < len; i++)
{
size_t bits = bs[i];
DWORD index;
while (BitScanForwardSizeT(&index, bits))
{
if (!func(i * BitsInSizeT + index))
return false;

bits ^= size_t(1) << index;
}
}
}

return true;
#undef BitScanForwardSizeT
}

//------------------------------------------------------------------------
// VisitBitsReverse: Invoke a callback for each index that is set in the
// bit vector, in descending order of indices.
//
// Type parameters:
// TFunc - Type of callback functor
//
// Arguments:
// env - The traits
// bs - The bit vector
// func - The functor callback. Return true to continue to the next bit,
// and false to abort.
//
// Returns:
// True if all bits were iterated; false if the callback returned false
// and iteration was aborted.
//
template <typename TFunc>
static bool VisitBitsReverse(Env env, BitSetShortLongRep bs, TFunc func)
{
#ifdef HOST_64BIT
#define BitScanReverseSizeT BitScanReverse64
#else
#define BitScanReverseSizeT BitScanReverse
#endif

if (BitSetOps::IsShort(env))
{
size_t bits = reinterpret_cast<size_t>(bs);
DWORD index;
while (BitScanReverseSizeT(&index, bits))
{
if (!func(index))
return false;

bits ^= size_t(1) << index;
}
}
else
{
unsigned len = BitSetTraits::GetArrSize(env);
for (unsigned i = len; i != 0; i--)
{
size_t bits = bs[i - 1];
DWORD index;
while (BitScanReverseSizeT(&index, bits))
{
if (!func((i - 1) * BitsInSizeT + index))
return false;

bits ^= size_t(1) << index;
}
}
}

return true;
#undef BitScanReverseSizeT
}

typedef const BitSetShortLongRep& ValArgType;
typedef BitSetShortLongRep RetValType;
};
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -2103,7 +2103,7 @@ class AllSuccessorEnumerator
}

// Returns the next available successor or `nullptr` if there are no more successors.
BasicBlock* NextSuccessor(Compiler* comp)
BasicBlock* NextSuccessor()
{
m_curSucc++;
if (m_curSucc >= m_numSuccs)
Expand Down
197 changes: 195 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1957,6 +1957,197 @@ inline LoopFlags& operator&=(LoopFlags& a, LoopFlags b)
return a = (LoopFlags)((unsigned short)a & (unsigned short)b);
}

// Represents a depth-first search tree of the flow graph.
class FlowGraphDfsTree
{
Compiler* m_comp;
BasicBlock** m_postOrder;
unsigned m_postOrderCount;

public:
FlowGraphDfsTree(Compiler* comp, BasicBlock** postOrder, unsigned postOrderCount)
: m_comp(comp)
, m_postOrder(postOrder)
, m_postOrderCount(postOrderCount)
{
}

Compiler* GetCompiler() const
{
return m_comp;
}

BasicBlock** GetPostOrder() const
{
return m_postOrder;
}

unsigned GetPostOrderCount() const
{
return m_postOrderCount;
}

BitVecTraits PostOrderTraits() const
{
return BitVecTraits(m_postOrderCount, m_comp);
}

bool Contains(BasicBlock* block) const;
bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const;
};

class FlowGraphNaturalLoop
{
friend class FlowGraphNaturalLoops;

const FlowGraphDfsTree* m_tree;
BasicBlock* m_header;
FlowGraphNaturalLoop* m_parent = nullptr;
// Bit vector of blocks in the loop; each index is the RPO index a block,
// with the head block's RPO index subtracted.
BitVec m_blocks;
unsigned m_blocksSize = 0;
jitstd::vector<FlowEdge*> m_backEdges;
jitstd::vector<FlowEdge*> m_entryEdges;
jitstd::vector<FlowEdge*> m_exitEdges;
unsigned m_index = 0;

FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* head);

unsigned LoopBlockBitVecIndex(BasicBlock* block);
bool TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex);

BitVecTraits LoopBlockTraits();
public:
BasicBlock* GetHeader() const
{
return m_header;
}

const FlowGraphDfsTree* GetDfsTree() const
{
return m_tree;
}

FlowGraphNaturalLoop* GetParent() const
{
return m_parent;
}

unsigned GetIndex() const
{
return m_index;
}

const jitstd::vector<FlowEdge*>& BackEdges()
{
return m_backEdges;
}

const jitstd::vector<FlowEdge*>& EntryEdges()
{
return m_entryEdges;
}

const jitstd::vector<FlowEdge*>& ExitEdges()
{
return m_exitEdges;
}

bool ContainsBlock(BasicBlock* block);

template<typename TFunc>
BasicBlockVisit VisitLoopBlocksReversePostOrder(TFunc func);

template<typename TFunc>
BasicBlockVisit VisitLoopBlocksPostOrder(TFunc func);

template<typename TFunc>
BasicBlockVisit VisitLoopBlocks(TFunc func);
};

class FlowGraphNaturalLoops
{
const FlowGraphDfsTree* m_dfs;
jitstd::vector<FlowGraphNaturalLoop*> m_loops;
unsigned m_improperLoopHeaders = 0;

FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs);

static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list<BasicBlock*>& worklist);
public:
size_t NumLoops()
{
return m_loops.size();
}

bool HaveNonNaturalLoopCycles()
{
return m_improperLoopHeaders > 0;
}

FlowGraphNaturalLoop* GetLoopFromHeader(BasicBlock* header);

bool IsLoopBackEdge(FlowEdge* edge);
bool IsLoopExitEdge(FlowEdge* edge);

class LoopsPostOrderIter
{
jitstd::vector<FlowGraphNaturalLoop*>* m_loops;

public:
LoopsPostOrderIter(jitstd::vector<FlowGraphNaturalLoop*>* loops)
: m_loops(loops)
{
}

jitstd::vector<FlowGraphNaturalLoop*>::reverse_iterator begin()
{
return m_loops->rbegin();
}

jitstd::vector<FlowGraphNaturalLoop*>::reverse_iterator end()
{
return m_loops->rend();
}
};

class LoopsReversePostOrderIter
{
jitstd::vector<FlowGraphNaturalLoop*>* m_loops;

public:
LoopsReversePostOrderIter(jitstd::vector<FlowGraphNaturalLoop*>* loops)
: m_loops(loops)
{
}

jitstd::vector<FlowGraphNaturalLoop*>::iterator begin()
{
return m_loops->begin();
}

jitstd::vector<FlowGraphNaturalLoop*>::iterator end()
{
return m_loops->end();
}
};

// Iterate the loops in post order (child loops before parent loops)
LoopsPostOrderIter InPostOrder()
{
return LoopsPostOrderIter(&m_loops);
}

// Iterate the loops in reverse post order (parent loops before child loops)
LoopsReversePostOrderIter InReversePostOrder()
{
return LoopsReversePostOrderIter(&m_loops);
}

static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs);
};

// The following holds information about instr offsets in terms of generated code.

enum class IPmappingDscKind
Expand Down Expand Up @@ -2052,6 +2243,7 @@ class Compiler
friend class LocalsUseVisitor;
friend class Promotion;
friend class ReplaceVisitor;
friend class FlowGraphNaturalLoop;

#ifdef FEATURE_HW_INTRINSICS
friend struct HWIntrinsicInfo;
Expand Down Expand Up @@ -4493,8 +4685,7 @@ class Compiler
unsigned fgBBNumMax; // The max bbNum that has been assigned to basic blocks
unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information
BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder
BasicBlock** fgSSAPostOrder; // Blocks in postorder, computed during SSA
unsigned fgSSAPostOrderCount; // Number of blocks in fgSSAPostOrder
FlowGraphDfsTree* m_dfs;

// After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute
// dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and
Expand Down Expand Up @@ -5588,6 +5779,8 @@ class Compiler

PhaseStatus fgSetBlockOrder();

FlowGraphDfsTree* fgComputeDfs();

void fgRemoveReturnBlock(BasicBlock* block);

void fgConvertBBToThrowBB(BasicBlock* block);
Expand Down
Loading

0 comments on commit f106d7e

Please sign in to comment.