Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MachineOutliner] Leaf Descendants #90275

Merged
merged 10 commits into from
Jun 18, 2024
34 changes: 31 additions & 3 deletions llvm/include/llvm/Support/SuffixTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class SuffixTree {
/// Each element is an integer representing an instruction in the module.
ArrayRef<unsigned> Str;

/// Whether to consider leaf descendants or only leaf children.
bool OutlinerLeafDescendants;

/// A repeated substring in the tree.
struct RepeatedSubstring {
/// The length of the string.
Expand Down Expand Up @@ -130,11 +133,27 @@ class SuffixTree {
/// this step.
unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd);

/// This vector contains all leaf nodes of this suffix tree. These leaf nodes
/// are identified using post-order depth-first traversal, so that the order
/// of these leaf nodes in the vector matches the order of the leaves in the
/// tree from left to right if one were to draw the tree on paper.
std::vector<SuffixTreeLeafNode *> LeafNodes;

/// Perform a post-order depth-first traversal of the tree and perform two
/// tasks during the traversal. The first is to populate LeafNodes, adding
/// nodes in order of the traversal. The second is to keep track of the leaf
/// descendants of every internal node by assigning values to LeftLeafIndex
/// and RightLefIndex fields of SuffixTreeNode for all internal nodes.
void setLeafNodes();

public:
/// Construct a suffix tree from a sequence of unsigned integers.
///
/// \param Str The string to construct the suffix tree for.
SuffixTree(const ArrayRef<unsigned> &Str);
/// \param OutlinerLeafDescendants Whether to consider leaf descendants or
/// only leaf children (used by Machine Outliner).
SuffixTree(const ArrayRef<unsigned> &Str,
bool OutlinerLeafDescendants = false);

/// Iterator for finding all repeated substrings in the suffix tree.
struct RepeatedSubstringIterator {
Expand All @@ -154,6 +173,12 @@ class SuffixTree {
/// instruction lengths.
const unsigned MinLength = 2;

/// Vector of leaf nodes of the suffix tree.
const std::vector<SuffixTreeLeafNode *> &LeafNodes;

/// Whether to consider leaf descendants or only leaf children.
bool OutlinerLeafDescendants = !LeafNodes.empty();

/// Move the iterator to the next repeated substring.
void advance();

Expand All @@ -179,7 +204,10 @@ class SuffixTree {
return !(*this == Other);
}

RepeatedSubstringIterator(SuffixTreeInternalNode *N) : N(N) {
RepeatedSubstringIterator(
SuffixTreeInternalNode *N,
const std::vector<SuffixTreeLeafNode *> &LeafNodes = {})
: N(N), LeafNodes(LeafNodes) {
// Do we have a non-null node?
if (!N)
return;
Expand All @@ -191,7 +219,7 @@ class SuffixTree {
};

typedef RepeatedSubstringIterator iterator;
iterator begin() { return iterator(Root); }
iterator begin() { return iterator(Root, LeafNodes); }
iterator end() { return iterator(nullptr); }
};

Expand Down
25 changes: 24 additions & 1 deletion llvm/include/llvm/Support/SuffixTreeNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ struct SuffixTreeNode {
/// the root to this node.
unsigned ConcatLen = 0;

/// These two indices give a range of indices for its leaf descendants.
/// Imagine drawing a tree on paper and assigning a unique index to each leaf
/// node in monotonically increasing order from left to right. This way of
/// numbering the leaf nodes allows us to associate a continuous range of
/// indices with each internal node. For example, if a node has leaf
/// descendants with indices i, i+1, ..., j, then its LeftLeafIdx is i and
/// its RightLeafIdx is j. These indices are for LeafNodes in the SuffixTree
/// class, which is constructed using post-order depth-first traversal.
unsigned LeftLeafIdx = EmptyIdx;
unsigned RightLeafIdx = EmptyIdx;

public:
// LLVM RTTI boilerplate.
NodeKind getKind() const { return Kind; }
Expand All @@ -56,6 +67,18 @@ struct SuffixTreeNode {
/// \returns the end index of this node.
virtual unsigned getEndIdx() const = 0;

/// \return the index of this node's left most leaf node.
unsigned getLeftLeafIdx() const;

/// \return the index of this node's right most leaf node.
unsigned getRightLeafIdx() const;

/// Set the index of the left most leaf node of this node to \p Idx.
void setLeftLeafIdx(unsigned Idx);

/// Set the index of the right most leaf node of this node to \p Idx.
void setRightLeafIdx(unsigned Idx);

/// Advance this node's StartIdx by \p Inc.
void incrementStartIdx(unsigned Inc);

Expand Down Expand Up @@ -168,4 +191,4 @@ struct SuffixTreeLeafNode : SuffixTreeNode {
virtual ~SuffixTreeLeafNode() = default;
};
} // namespace llvm
#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
8 changes: 7 additions & 1 deletion llvm/lib/CodeGen/MachineOutliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold(
cl::desc(
"The minimum size in bytes before an outlining candidate is accepted"));

static cl::opt<bool> OutlinerLeafDescendants(
"outliner-leaf-descendants", cl::init(true), cl::Hidden,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong preference, but wonder if we want a disable flag instead, like -disable-outliner-leaf-descendants and we can drop cl::init(false), or completely delete a flag while updating all tests. I'm all open for either option.

cl::desc("Consider all leaf descendants of internal nodes of the suffix "
"tree as candidates for outlining (if false, only leaf children "
"are considered)"));

namespace {

/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
Expand Down Expand Up @@ -576,7 +582,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
void MachineOutliner::findCandidates(
InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
SuffixTree ST(Mapper.UnsignedVec);
SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);

// First, find all of the repeated substrings in the tree of minimum length
// 2.
Expand Down
107 changes: 92 additions & 15 deletions llvm/lib/Support/SuffixTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
//===----------------------------------------------------------------------===//

#include "llvm/Support/SuffixTree.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/SuffixTreeNode.h"
#include <stack>
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved

using namespace llvm;

Expand All @@ -26,7 +28,9 @@ static size_t numElementsInSubstring(const SuffixTreeNode *N) {
return N->getEndIdx() - N->getStartIdx() + 1;
}

SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
bool OutlinerLeafDescendants)
: Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
Root = insertRoot();
Active.Node = Root;

Expand All @@ -46,6 +50,11 @@ SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
// Set the suffix indices of each leaf.
assert(Root && "Root node can't be nullptr!");
setSuffixIndices();

// Collect all leaf nodes of the suffix tree. And for each internal node,
// record the range of leaf nodes that are descendants of it.
if (OutlinerLeafDescendants)
setLeafNodes();
}

SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
Expand Down Expand Up @@ -105,6 +114,68 @@ void SuffixTree::setSuffixIndices() {
}
}

void SuffixTree::setLeafNodes() {
// A stack that keeps track of nodes to visit for post-order DFS traversal.
std::stack<SuffixTreeNode *> ToVisit;
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
ToVisit.push(Root);

// This keeps track of the index of the next leaf node to be added to
// the LeafNodes vector of the suffix tree.
unsigned LeafCounter = 0;

// This keeps track of nodes whose children have been added to the stack
// during the post-order depth-first traversal of the tree.
llvm::SmallPtrSet<SuffixTreeInternalNode *, 32> ChildrenAddedToStack;

// Traverse the tree in post-order.
while (!ToVisit.empty()) {
SuffixTreeNode *CurrNode = ToVisit.top();
ToVisit.pop();
if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
// The current node is an internal node.
if (ChildrenAddedToStack.find(CurrInternalNode) !=
ChildrenAddedToStack.end()) {
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
// If the children of the current node has been added to the stack,
// then this is the second time we visit this node and at this point,
// all of its children have already been processed. Now, we can
// set its LeftLeafIdx and RightLeafIdx;
auto it = CurrInternalNode->Children.begin();
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
if (it != CurrInternalNode->Children.end()) {
// Get the first child to use its RightLeafIdx. The RightLeafIdx is
// used as the first child is the initial one added to the stack, so
// it's the last one to be processed. This implies that the leaf
// descendants of the first child are assigned the largest index
// numbers.
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
CurrNode->setRightLeafIdx(it->second->getRightLeafIdx());
// get the last child to use its LeftLeafIdx.
while (std::next(it) != CurrInternalNode->Children.end())
it = std::next(it);
CurrNode->setLeftLeafIdx(it->second->getLeftLeafIdx());
assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
"LeftLeafIdx should not be larger than RightLeafIdx");
}
} else {
// This is the first time we visit this node. This means that its
// children have not been added to the stack yet. Hence, we will add
// the current node back to the stack and add its children to the
// stack for processing.
ToVisit.push(CurrNode);
for (auto &ChildPair : CurrInternalNode->Children)
ToVisit.push(ChildPair.second);
ChildrenAddedToStack.insert(CurrInternalNode);
}
} else {
// The current node is a leaf node.
// We can simplyset its LeftLeafIdx and RightLeafIdx.
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
CurrNode->setLeftLeafIdx(LeafCounter);
CurrNode->setRightLeafIdx(LeafCounter);
LeafCounter++;
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
LeafNodes.push_back(CurrLeafNode);
}
}
}

unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
SuffixTreeInternalNode *NeedsLink = nullptr;

Expand Down Expand Up @@ -230,6 +301,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() {

// Each leaf node represents a repeat of a string.
SmallVector<unsigned> RepeatedSubstringStarts;
SmallVector<SuffixTreeLeafNode *> LeafDescendants;

// Continue visiting nodes until we find one which repeats more than once.
while (!InternalNodesToVisit.empty()) {
Expand All @@ -241,30 +313,35 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
// it's too short, we'll quit.
unsigned Length = Curr->getConcatLen();

// Iterate over each child, saving internal nodes for visiting, and
// leaf nodes' SuffixIdx in RepeatedSubstringStarts. Internal nodes
// represent individual strings, which may repeat.
for (auto &ChildPair : Curr->Children) {
// Iterate over each child, saving internal nodes for visiting.
// Internal nodes represent individual strings, which may repeat.
for (auto &ChildPair : Curr->Children)
// Save all of this node's children for processing.
if (auto *InternalChild =
dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
InternalNodesToVisit.push_back(InternalChild);
continue;
}

if (Length < MinLength)
continue;

// Have an occurrence of a potentially repeated string. Save it.
auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
}
// If length of repeated substring is below threshold, then skip it.
if (Length < MinLength)
continue;

// The root never represents a repeated substring. If we're looking at
// that, then skip it.
if (Curr->isRoot())
continue;

// Collect leaf children or leaf descendants by OutlinerLeafDescendants.
if (!OutlinerLeafDescendants) {
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
for (auto &ChildPair : Curr->Children)
if (auto *Leaf = dyn_cast<SuffixTreeLeafNode>(ChildPair.second))
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
} else {
LeafDescendants.assign(LeafNodes.begin() + Curr->getLeftLeafIdx(),
xuanzhang816 marked this conversation as resolved.
Show resolved Hide resolved
LeafNodes.begin() + Curr->getRightLeafIdx() + 1);
for (SuffixTreeLeafNode *Leaf : LeafDescendants)
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
}

// Do we have any repeated substrings?
if (RepeatedSubstringStarts.size() < 2)
continue;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Support/SuffixTreeNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ unsigned SuffixTreeLeafNode::getEndIdx() const {

unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }

unsigned SuffixTreeNode::getLeftLeafIdx() const { return LeftLeafIdx; }
unsigned SuffixTreeNode::getRightLeafIdx() const { return RightLeafIdx; }
void SuffixTreeNode::setLeftLeafIdx(unsigned Idx) { LeftLeafIdx = Idx; }
void SuffixTreeNode::setRightLeafIdx(unsigned Idx) { RightLeafIdx = Idx; }
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs -outliner-leaf-descendants=false %s -o - | FileCheck %s

# Outlining CFI instructions is unsafe if we cannot outline all of the CFI
# instructions from a function. This shows that we choose not to outline the
Expand Down
Loading
Loading