Skip to content

Commit

Permalink
Update CFGWalker to generate consolidated exit blocks (WebAssembly#6079)
Browse files Browse the repository at this point in the history
Previously CFGWalker designated a particular block as the "exit" block, but it
was just the block that happened to appear at the end of the function that
returned values by implicitly flowing them out. That exit block was not tied in
any way to other blocks that might end in returns, so analyses that needed to
perform some action at the end of the function would have had to perform that
action at the end of the designated exit block but also separately at any return
instruction.

Update CFGWalker to make the exit block a synthetic empty block that is a
successor of all other blocks tthat implicitly or explicitly return from the
function in case there are multiple such blocks, or to make the exit block the
single returning block if there is only one. This means that analyses will only
perform their end-of-function actions at the end of the exit block rather than
additionally at every return instruction.
  • Loading branch information
tlively authored and radekdoulik committed Jul 12, 2024
1 parent c246150 commit e3e47b5
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 23 deletions.
14 changes: 14 additions & 0 deletions src/analysis/cfg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ CFG CFG::fromFunction(Function* func) {
}
}

assert(!cfg.blocks.empty());
cfg.blocks[0].entry = true;
if (builder.exit) {
oldToNewBlocks.at(builder.exit)->exit = true;
}

// Move-construct a new CFG to get mandatory copy elision, preserving basic
// block addresses through the return.
return CFG(std::move(cfg));
Expand Down Expand Up @@ -96,6 +102,14 @@ void BasicBlock::print(std::ostream& os, Module* wasm, size_t start) const {
}
os << "]\n";

if (isEntry()) {
os << ";; entry\n";
}

if (isExit()) {
os << ";; exit\n";
}

os << index << ":\n";
size_t instIndex = start;
for (auto* inst : *this) {
Expand Down
5 changes: 5 additions & 0 deletions src/analysis/cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,13 @@ struct BasicBlock {

Index getIndex() const { return index; }

bool isEntry() const { return entry; }
bool isExit() const { return exit; }

private:
Index index;
bool entry = false;
bool exit = false;
std::vector<Expression*> insts;
std::vector<const BasicBlock*> predecessors;
std::vector<const BasicBlock*> successors;
Expand Down
95 changes: 74 additions & 21 deletions src/cfg/cfg-traversal.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,18 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
std::vector<BasicBlock*> out, in;
};

// The entry block at the function's start. This always exists.
BasicBlock* entry;

// The exit block at the end, where control flow reaches the end of the
// function. If the function has a control flow path that flows out (that is,
// exits the function without a return or an unreachable or such), then that
// control flow path ends in this block. In particular, if the function flows
// out a value (as opposed to only returning values using an explicit return)
// then that value would be at the end of this block.
// Put another way, this block has a control flow edge out of the function,
// and that edge is not because of a return. (Hence an analysis that cares
// about reaching the end of the function must not only look for returns, but
// also the end of this block.)
// Note: It is possible for this block to not exist, if the function ends in
// a return or an unreachable.
BasicBlock* exit;
// The entry block at the function's start. This always exists, although it
// might be empty if the function is empty.
BasicBlock* entry = nullptr;

// The exit block for the function: either the single block that returns or
// flows values out of the function, or an empty synthetic block that is a
// successor of all such blocks. This block may not exist if a function
// traps, infinitely loops, throws, or otherwise never exits normally.
//
// Analyses that care about reaching the end of the function can just look at
// this block instead of all the individual returns.
BasicBlock* exit = nullptr;

// override this with code to create a BasicBlock if necessary
BasicBlock* makeBasicBlock() { return new BasicBlock(); }
Expand Down Expand Up @@ -161,6 +157,32 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
self->branches.erase(curr->name);
}

// Whether we have created a synthetic, empty exit block for multiple other
// exit blocks to flow to.
bool hasSyntheticExit = false;

static void doEndReturn(SubType* self, Expression** currp) {
auto* last = self->currBasicBlock;
self->startUnreachableBlock();
if (!self->exit) {
// This is our first exit block and may be our only exit block, so just
// set it.
self->exit = last;
} else if (!self->hasSyntheticExit) {
// We now have multiple exit blocks, so we need to create a synthetic one.
// It will be added to the list of basic blocks at the end of the
// function.
auto* lastExit = self->exit;
self->exit = self->makeBasicBlock();
self->link(lastExit, self->exit);
self->link(last, self->exit);
self->hasSyntheticExit = true;
} else {
// We already have a synthetic exit block. Just link it up.
self->link(last, self->exit);
}
}

static void doStartIfTrue(SubType* self, Expression** currp) {
auto* last = self->currBasicBlock;
self->link(last, self->startBasicBlock()); // ifTrue
Expand Down Expand Up @@ -387,6 +409,19 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
self->startUnreachableBlock();
}

static bool isReturnCall(Expression* curr) {
switch (curr->_id) {
case Expression::Id::CallId:
return curr->cast<Call>()->isReturn;
case Expression::Id::CallIndirectId:
return curr->cast<CallIndirect>()->isReturn;
case Expression::Id::CallRefId:
return curr->cast<CallRef>()->isReturn;
default:
WASM_UNREACHABLE("not a call");
}
}

static void scan(SubType* self, Expression** currp) {
Expression* curr = *currp;

Expand Down Expand Up @@ -414,13 +449,20 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
case Expression::Id::CallId:
case Expression::Id::CallIndirectId:
case Expression::Id::CallRefId: {
auto* module = self->getModule();
if (!module || module->features.hasExceptionHandling()) {
// This call might throw, so run the code to handle that.
self->pushTask(SubType::doEndCall, currp);
if (isReturnCall(curr)) {
self->pushTask(SubType::doEndReturn, currp);
} else {
auto* module = self->getModule();
if (!module || module->features.hasExceptionHandling()) {
// This call might throw, so run the code to handle that.
self->pushTask(SubType::doEndCall, currp);
}
}
break;
}
case Expression::Id::ReturnId:
self->pushTask(SubType::doEndReturn, currp);
break;
case Expression::Id::TryId: {
self->pushTask(SubType::doEndTry, currp);
auto& catchBodies = curr->cast<Try>()->catchBodies;
Expand Down Expand Up @@ -466,7 +508,18 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
startBasicBlock();
entry = currBasicBlock;
PostWalker<SubType, VisitorType>::doWalkFunction(func);
exit = currBasicBlock;

// The last block, if it exists, implicitly returns.
if (currBasicBlock) {
auto* self = static_cast<SubType*>(this);
self->doEndReturn(self, nullptr);
}

// If we have a synthetic exit block, add it to the list of basic blocks
// here so it always comes at the end.
if (hasSyntheticExit) {
basicBlocks.push_back(std::unique_ptr<BasicBlock>(exit));
}

assert(branches.size() == 0);
assert(ifStack.size() == 0);
Expand Down
59 changes: 57 additions & 2 deletions test/gtest/cfg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ TEST_F(CFGTest, Print) {
)wasm";

auto cfgText = R"cfg(;; preds: [], succs: [1, 5]
;; entry
0:
0: i32.const 0
1: drop
Expand All @@ -66,15 +67,19 @@ TEST_F(CFGTest, Print) {
6: i32.const 3
7: block
;; preds: [0], succs: []
;; preds: [0], succs: [7]
5:
8: i32.const 4
9: return
;; preds: [4], succs: []
;; preds: [4], succs: [7]
6:
10: drop
11: block
;; preds: [5, 6], succs: []
;; exit
7:
)cfg";

Module wasm;
Expand Down Expand Up @@ -110,12 +115,14 @@ TEST_F(CFGTest, CallBlock) {
)wasm";

auto cfgText = R"cfg(;; preds: [], succs: [1]
;; entry
0:
0: i32.const 0
1: drop
2: call $bar
;; preds: [0], succs: []
;; exit
1:
3: i32.const 1
4: drop
Expand All @@ -133,6 +140,54 @@ TEST_F(CFGTest, CallBlock) {
EXPECT_EQ(ss.str(), cfgText);
}

TEST_F(CFGTest, Empty) {
// Check that we create a correct CFG for an empty function.
auto moduleText = R"wasm(
(module (func $foo))
)wasm";

auto cfgText = R"cfg(;; preds: [], succs: []
;; entry
;; exit
0:
0: nop
)cfg";

Module wasm;
parseWast(wasm, moduleText);

CFG cfg = CFG::fromFunction(wasm.getFunction("foo"));

std::stringstream ss;
cfg.print(ss);

EXPECT_EQ(ss.str(), cfgText);
}

TEST_F(CFGTest, Unreachable) {
// Check that we create a correct CFG for a function that does not return. In
// particular, it should not have an exit block.
auto moduleText = R"wasm(
(module (func $foo (unreachable)))
)wasm";

auto cfgText = R"cfg(;; preds: [], succs: []
;; entry
0:
0: unreachable
)cfg";

Module wasm;
parseWast(wasm, moduleText);

CFG cfg = CFG::fromFunction(wasm.getFunction("foo"));

std::stringstream ss;
cfg.print(ss);

EXPECT_EQ(ss.str(), cfgText);
}

TEST_F(CFGTest, FinitePowersetLatticeFunctioning) {

std::vector<std::string> initialSet = {"a", "b", "c", "d", "e", "f"};
Expand Down

0 comments on commit e3e47b5

Please sign in to comment.