Skip to content

Commit

Permalink
consider leaf descendants to include more candidates for outlining
Browse files Browse the repository at this point in the history
  • Loading branch information
xuanzhang816 committed Apr 26, 2024
1 parent 5b8fb07 commit 75c515f
Show file tree
Hide file tree
Showing 15 changed files with 441 additions and 20 deletions.
34 changes: 31 additions & 3 deletions llvm/include/llvm/Support/SuffixTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class SuffixTree {
/// Each element is an integer representing an instruction in the module.
ArrayRef<unsigned> Str;

/// Whether to consider leaf descendants or only leaf children.
bool OutlinerLeafDescendants;

/// A repeated substring in the tree.
struct RepeatedSubstring {
/// The length of the string.
Expand Down Expand Up @@ -130,11 +133,27 @@ class SuffixTree {
/// this step.
unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd);

/// This vector contains all leaf nodes of this suffix tree. These leaf nodes
/// are identified using post-order depth-first traversal, so that the order
/// of these leaf nodes in the vector matches the order of the leaves in the
/// tree from left to right if one were to draw the tree on paper.
std::vector<SuffixTreeLeafNode *> LeafNodes;

/// Perform a post-order depth-first traversal of the tree and perform two
/// tasks during the traversal. The first is to populate LeafNodes, adding
/// nodes in order of the traversal. The second is to keep track of the leaf
/// descendants of every internal node by assigning values to LeftLeafIndex
/// and RightLefIndex fields of SuffixTreeNode for all internal nodes.
void setLeafNodes();

public:
/// Construct a suffix tree from a sequence of unsigned integers.
///
/// \param Str The string to construct the suffix tree for.
SuffixTree(const ArrayRef<unsigned> &Str);
/// \param OutlinerLeafDescendants Whether to consider leaf descendants or
/// only leaf children (used by Machine Outliner).
SuffixTree(const ArrayRef<unsigned> &Str,
bool OutlinerLeafDescendants = false);

/// Iterator for finding all repeated substrings in the suffix tree.
struct RepeatedSubstringIterator {
Expand All @@ -154,6 +173,12 @@ class SuffixTree {
/// instruction lengths.
const unsigned MinLength = 2;

/// Vector of leaf nodes of the suffix tree.
const std::vector<SuffixTreeLeafNode *> &LeafNodes;

/// Whether to consider leaf descendants or only leaf children.
bool OutlinerLeafDescendants = !LeafNodes.empty();

/// Move the iterator to the next repeated substring.
void advance();

Expand All @@ -179,7 +204,10 @@ class SuffixTree {
return !(*this == Other);
}

RepeatedSubstringIterator(SuffixTreeInternalNode *N) : N(N) {
RepeatedSubstringIterator(
SuffixTreeInternalNode *N,
const std::vector<SuffixTreeLeafNode *> &LeafNodes = {})
: N(N), LeafNodes(LeafNodes) {
// Do we have a non-null node?
if (!N)
return;
Expand All @@ -191,7 +219,7 @@ class SuffixTree {
};

typedef RepeatedSubstringIterator iterator;
iterator begin() { return iterator(Root); }
iterator begin() { return iterator(Root, LeafNodes); }
iterator end() { return iterator(nullptr); }
};

Expand Down
25 changes: 24 additions & 1 deletion llvm/include/llvm/Support/SuffixTreeNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ struct SuffixTreeNode {
/// the root to this node.
unsigned ConcatLen = 0;

/// These two indices give a range of indices for its leaf descendants.
/// Imagine drawing a tree on paper and assigning a unique index to each leaf
/// node in monotonically increasing order from left to right. This way of
/// numbering the leaf nodes allows us to associate a continuous range of
/// indices with each internal node. For example, if a node has leaf
/// descendants with indices i, i+1, ..., j, then its LeftLeafIdx is i and
/// its RightLeafIdx is j. These indices are for LeafNodes in the SuffixTree
/// class, which is constructed using post-order depth-first traversal.
unsigned LeftLeafIdx = EmptyIdx;
unsigned RightLeafIdx = EmptyIdx;

public:
// LLVM RTTI boilerplate.
NodeKind getKind() const { return Kind; }
Expand All @@ -56,6 +67,18 @@ struct SuffixTreeNode {
/// \returns the end index of this node.
virtual unsigned getEndIdx() const = 0;

/// \return the index of this node's left most leaf node.
unsigned getLeftLeafIdx() const;

/// \return the index of this node's right most leaf node.
unsigned getRightLeafIdx() const;

/// Set the index of the left most leaf node of this node to \p Idx.
void setLeftLeafIdx(unsigned Idx);

/// Set the index of the right most leaf node of this node to \p Idx.
void setRightLeafIdx(unsigned Idx);

/// Advance this node's StartIdx by \p Inc.
void incrementStartIdx(unsigned Inc);

Expand Down Expand Up @@ -168,4 +191,4 @@ struct SuffixTreeLeafNode : SuffixTreeNode {
virtual ~SuffixTreeLeafNode() = default;
};
} // namespace llvm
#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
8 changes: 7 additions & 1 deletion llvm/lib/CodeGen/MachineOutliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold(
cl::desc(
"The minimum size in bytes before an outlining candidate is accepted"));

static cl::opt<bool> OutlinerLeafDescendants(
"outliner-leaf-descendants", cl::init(true), cl::Hidden,
cl::desc("Consider all leaf descendants of internal nodes of the suffix "
"tree as candidates for outlining (if false, only leaf children "
"are considered)"));

namespace {

/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
Expand Down Expand Up @@ -576,7 +582,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
void MachineOutliner::findCandidates(
InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
SuffixTree ST(Mapper.UnsignedVec);
SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);

// First, find all of the repeated substrings in the tree of minimum length
// 2.
Expand Down
83 changes: 81 additions & 2 deletions llvm/lib/Support/SuffixTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
//===----------------------------------------------------------------------===//

#include "llvm/Support/SuffixTree.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/SuffixTreeNode.h"
#include <stack>

using namespace llvm;

Expand All @@ -26,7 +28,9 @@ static size_t numElementsInSubstring(const SuffixTreeNode *N) {
return N->getEndIdx() - N->getStartIdx() + 1;
}

SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
bool OutlinerLeafDescendants)
: Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
Root = insertRoot();
Active.Node = Root;

Expand All @@ -46,6 +50,11 @@ SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
// Set the suffix indices of each leaf.
assert(Root && "Root node can't be nullptr!");
setSuffixIndices();

// Collect all leaf nodes of the suffix tree. And for each internal node,
// record the range of leaf nodes that are descendants of it.
if (OutlinerLeafDescendants)
setLeafNodes();
}

SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
Expand Down Expand Up @@ -105,6 +114,68 @@ void SuffixTree::setSuffixIndices() {
}
}

void SuffixTree::setLeafNodes() {
// A stack that keeps track of nodes to visit for post-order DFS traversal.
std::stack<SuffixTreeNode *> ToVisit;
ToVisit.push(Root);

// This keeps track of the index of the next leaf node to be added to
// the LeafNodes vector of the suffix tree.
unsigned LeafCounter = 0;

// This keeps track of nodes whose children have been added to the stack
// during the post-order depth-first traversal of the tree.
llvm::SmallPtrSet<SuffixTreeInternalNode *, 32> ChildrenAddedToStack;

// Traverse the tree in post-order.
while (!ToVisit.empty()) {
SuffixTreeNode *CurrNode = ToVisit.top();
ToVisit.pop();
if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
// The current node is an internal node.
if (ChildrenAddedToStack.find(CurrInternalNode) !=
ChildrenAddedToStack.end()) {
// If the children of the current node has been added to the stack,
// then this is the second time we visit this node and at this point,
// all of its children have already been processed. Now, we can
// set its LeftLeafIdx and RightLeafIdx;
auto it = CurrInternalNode->Children.begin();
if (it != CurrInternalNode->Children.end()) {
// Get the first child to use its RightLeafIdx. The RightLeafIdx is
// used as the first child is the initial one added to the stack, so
// it's the last one to be processed. This implies that the leaf
// descendants of the first child are assigned the largest index
// numbers.
CurrNode->setRightLeafIdx(it->second->getRightLeafIdx());
// get the last child to use its LeftLeafIdx.
while (std::next(it) != CurrInternalNode->Children.end())
it = std::next(it);
CurrNode->setLeftLeafIdx(it->second->getLeftLeafIdx());
assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
"LeftLeafIdx should not be larger than RightLeafIdx");
}
} else {
// This is the first time we visit this node. This means that its
// children have not been added to the stack yet. Hence, we will add
// the current node back to the stack and add its children to the
// stack for processing.
ToVisit.push(CurrNode);
for (auto &ChildPair : CurrInternalNode->Children)
ToVisit.push(ChildPair.second);
ChildrenAddedToStack.insert(CurrInternalNode);
}
} else {
// The current node is a leaf node.
// We can simplyset its LeftLeafIdx and RightLeafIdx.
CurrNode->setLeftLeafIdx(LeafCounter);
CurrNode->setRightLeafIdx(LeafCounter);
LeafCounter++;
auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
LeafNodes.push_back(CurrLeafNode);
}
}
}

unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
SuffixTreeInternalNode *NeedsLink = nullptr;

Expand Down Expand Up @@ -230,6 +301,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() {

// Each leaf node represents a repeat of a string.
SmallVector<unsigned> RepeatedSubstringStarts;
SmallVector<SuffixTreeLeafNode *> LeafDescendants;

// Continue visiting nodes until we find one which repeats more than once.
while (!InternalNodesToVisit.empty()) {
Expand All @@ -252,14 +324,21 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
continue;
}

if (Length < MinLength)
if (Length < MinLength || OutlinerLeafDescendants)
continue;

// Have an occurrence of a potentially repeated string. Save it.
auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
}

if (OutlinerLeafDescendants && Length >= MinLength) {
LeafDescendants.assign(LeafNodes.begin() + Curr->getLeftLeafIdx(),
LeafNodes.begin() + Curr->getRightLeafIdx() + 1);
for (SuffixTreeLeafNode *Leaf : LeafDescendants)
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
}

// The root never represents a repeated substring. If we're looking at
// that, then skip it.
if (Curr->isRoot())
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Support/SuffixTreeNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ unsigned SuffixTreeLeafNode::getEndIdx() const {

unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }

unsigned SuffixTreeNode::getLeftLeafIdx() const { return LeftLeafIdx; }
unsigned SuffixTreeNode::getRightLeafIdx() const { return RightLeafIdx; }
void SuffixTreeNode::setLeftLeafIdx(unsigned Idx) { LeftLeafIdx = Idx; }
void SuffixTreeNode::setRightLeafIdx(unsigned Idx) { RightLeafIdx = Idx; }
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs -outliner-leaf-descendants=false %s -o - | FileCheck %s

# Outlining CFI instructions is unsafe if we cannot outline all of the CFI
# instructions from a function. This shows that we choose not to outline the
Expand Down
Loading

0 comments on commit 75c515f

Please sign in to comment.