diff --git a/src/analysis/cfg.cpp b/src/analysis/cfg.cpp index ba638ec5ae5..5d939f7a622 100644 --- a/src/analysis/cfg.cpp +++ b/src/analysis/cfg.cpp @@ -62,6 +62,12 @@ CFG CFG::fromFunction(Function* func) { } } + assert(!cfg.blocks.empty()); + cfg.blocks[0].entry = true; + if (builder.exit) { + oldToNewBlocks.at(builder.exit)->exit = true; + } + // Move-construct a new CFG to get mandatory copy elision, preserving basic // block addresses through the return. return CFG(std::move(cfg)); @@ -96,6 +102,14 @@ void BasicBlock::print(std::ostream& os, Module* wasm, size_t start) const { } os << "]\n"; + if (isEntry()) { + os << ";; entry\n"; + } + + if (isExit()) { + os << ";; exit\n"; + } + os << index << ":\n"; size_t instIndex = start; for (auto* inst : *this) { diff --git a/src/analysis/cfg.h b/src/analysis/cfg.h index baabe959187..b05f3c47c11 100644 --- a/src/analysis/cfg.h +++ b/src/analysis/cfg.h @@ -51,8 +51,13 @@ struct BasicBlock { Index getIndex() const { return index; } + bool isEntry() const { return entry; } + bool isExit() const { return exit; } + private: Index index; + bool entry = false; + bool exit = false; std::vector insts; std::vector predecessors; std::vector successors; diff --git a/src/cfg/cfg-traversal.h b/src/cfg/cfg-traversal.h index 4ae07504989..ca270685719 100644 --- a/src/cfg/cfg-traversal.h +++ b/src/cfg/cfg-traversal.h @@ -46,22 +46,18 @@ struct CFGWalker : public PostWalker { std::vector out, in; }; - // The entry block at the function's start. This always exists. - BasicBlock* entry; - - // The exit block at the end, where control flow reaches the end of the - // function. If the function has a control flow path that flows out (that is, - // exits the function without a return or an unreachable or such), then that - // control flow path ends in this block. In particular, if the function flows - // out a value (as opposed to only returning values using an explicit return) - // then that value would be at the end of this block. - // Put another way, this block has a control flow edge out of the function, - // and that edge is not because of a return. (Hence an analysis that cares - // about reaching the end of the function must not only look for returns, but - // also the end of this block.) - // Note: It is possible for this block to not exist, if the function ends in - // a return or an unreachable. - BasicBlock* exit; + // The entry block at the function's start. This always exists, although it + // might be empty if the function is empty. + BasicBlock* entry = nullptr; + + // The exit block for the function: either the single block that returns or + // flows values out of the function, or an empty synthetic block that is a + // successor of all such blocks. This block may not exist if a function + // traps, infinitely loops, throws, or otherwise never exits normally. + // + // Analyses that care about reaching the end of the function can just look at + // this block instead of all the individual returns. + BasicBlock* exit = nullptr; // override this with code to create a BasicBlock if necessary BasicBlock* makeBasicBlock() { return new BasicBlock(); } @@ -161,6 +157,32 @@ struct CFGWalker : public PostWalker { self->branches.erase(curr->name); } + // Whether we have created a synthetic, empty exit block for multiple other + // exit blocks to flow to. + bool hasSyntheticExit = false; + + static void doEndReturn(SubType* self, Expression** currp) { + auto* last = self->currBasicBlock; + self->startUnreachableBlock(); + if (!self->exit) { + // This is our first exit block and may be our only exit block, so just + // set it. + self->exit = last; + } else if (!self->hasSyntheticExit) { + // We now have multiple exit blocks, so we need to create a synthetic one. + // It will be added to the list of basic blocks at the end of the + // function. + auto* lastExit = self->exit; + self->exit = self->makeBasicBlock(); + self->link(lastExit, self->exit); + self->link(last, self->exit); + self->hasSyntheticExit = true; + } else { + // We already have a synthetic exit block. Just link it up. + self->link(last, self->exit); + } + } + static void doStartIfTrue(SubType* self, Expression** currp) { auto* last = self->currBasicBlock; self->link(last, self->startBasicBlock()); // ifTrue @@ -387,6 +409,19 @@ struct CFGWalker : public PostWalker { self->startUnreachableBlock(); } + static bool isReturnCall(Expression* curr) { + switch (curr->_id) { + case Expression::Id::CallId: + return curr->cast()->isReturn; + case Expression::Id::CallIndirectId: + return curr->cast()->isReturn; + case Expression::Id::CallRefId: + return curr->cast()->isReturn; + default: + WASM_UNREACHABLE("not a call"); + } + } + static void scan(SubType* self, Expression** currp) { Expression* curr = *currp; @@ -414,13 +449,20 @@ struct CFGWalker : public PostWalker { case Expression::Id::CallId: case Expression::Id::CallIndirectId: case Expression::Id::CallRefId: { - auto* module = self->getModule(); - if (!module || module->features.hasExceptionHandling()) { - // This call might throw, so run the code to handle that. - self->pushTask(SubType::doEndCall, currp); + if (isReturnCall(curr)) { + self->pushTask(SubType::doEndReturn, currp); + } else { + auto* module = self->getModule(); + if (!module || module->features.hasExceptionHandling()) { + // This call might throw, so run the code to handle that. + self->pushTask(SubType::doEndCall, currp); + } } break; } + case Expression::Id::ReturnId: + self->pushTask(SubType::doEndReturn, currp); + break; case Expression::Id::TryId: { self->pushTask(SubType::doEndTry, currp); auto& catchBodies = curr->cast()->catchBodies; @@ -466,7 +508,18 @@ struct CFGWalker : public PostWalker { startBasicBlock(); entry = currBasicBlock; PostWalker::doWalkFunction(func); - exit = currBasicBlock; + + // The last block, if it exists, implicitly returns. + if (currBasicBlock) { + auto* self = static_cast(this); + self->doEndReturn(self, nullptr); + } + + // If we have a synthetic exit block, add it to the list of basic blocks + // here so it always comes at the end. + if (hasSyntheticExit) { + basicBlocks.push_back(std::unique_ptr(exit)); + } assert(branches.size() == 0); assert(ifStack.size() == 0); diff --git a/test/gtest/cfg.cpp b/test/gtest/cfg.cpp index 583c8c4f0d2..bf3742bc13d 100644 --- a/test/gtest/cfg.cpp +++ b/test/gtest/cfg.cpp @@ -44,6 +44,7 @@ TEST_F(CFGTest, Print) { )wasm"; auto cfgText = R"cfg(;; preds: [], succs: [1, 5] +;; entry 0: 0: i32.const 0 1: drop @@ -66,15 +67,19 @@ TEST_F(CFGTest, Print) { 6: i32.const 3 7: block -;; preds: [0], succs: [] +;; preds: [0], succs: [7] 5: 8: i32.const 4 9: return -;; preds: [4], succs: [] +;; preds: [4], succs: [7] 6: 10: drop 11: block + +;; preds: [5, 6], succs: [] +;; exit +7: )cfg"; Module wasm; @@ -110,12 +115,14 @@ TEST_F(CFGTest, CallBlock) { )wasm"; auto cfgText = R"cfg(;; preds: [], succs: [1] +;; entry 0: 0: i32.const 0 1: drop 2: call $bar ;; preds: [0], succs: [] +;; exit 1: 3: i32.const 1 4: drop @@ -133,6 +140,54 @@ TEST_F(CFGTest, CallBlock) { EXPECT_EQ(ss.str(), cfgText); } +TEST_F(CFGTest, Empty) { + // Check that we create a correct CFG for an empty function. + auto moduleText = R"wasm( + (module (func $foo)) + )wasm"; + + auto cfgText = R"cfg(;; preds: [], succs: [] +;; entry +;; exit +0: + 0: nop +)cfg"; + + Module wasm; + parseWast(wasm, moduleText); + + CFG cfg = CFG::fromFunction(wasm.getFunction("foo")); + + std::stringstream ss; + cfg.print(ss); + + EXPECT_EQ(ss.str(), cfgText); +} + +TEST_F(CFGTest, Unreachable) { + // Check that we create a correct CFG for a function that does not return. In + // particular, it should not have an exit block. + auto moduleText = R"wasm( + (module (func $foo (unreachable))) + )wasm"; + + auto cfgText = R"cfg(;; preds: [], succs: [] +;; entry +0: + 0: unreachable +)cfg"; + + Module wasm; + parseWast(wasm, moduleText); + + CFG cfg = CFG::fromFunction(wasm.getFunction("foo")); + + std::stringstream ss; + cfg.print(ss); + + EXPECT_EQ(ss.str(), cfgText); +} + TEST_F(CFGTest, FinitePowersetLatticeFunctioning) { std::vector initialSet = {"a", "b", "c", "d", "e", "f"};