From 01eb2d7b687e8a4148a90b3a4984fc7630f2b44d Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 2 Nov 2023 16:47:42 -0700
Subject: [PATCH 1/2] Update CFGWalker to generate consolidated exit blocks

Previously CFGWalker designated a particular block as the "exit" block, but it
was just the block that happened to appear at the end of the function that
returned values by implicitly flowing them out. That exit block was not tied in
any way to other blocks that might end in returns, so analyses that needed to
perform some action at the end of the function would have had to perform that
action at the end of the designated exit block but also separately at any return
instruction.

Update CFGWalker to make the exit block a synthetic empty block that is a
successor of all other blocks tthat implicitly or explicitly return from the
function in case there are multiple such blocks, or to make the exit block the
single returning block if there is only one. This means that analyses will only
perform their end-of-function actions at the end of the exit block rather than
additionally at every return instruction.
---
 src/analysis/cfg.cpp    | 14 ++++++
 src/analysis/cfg.h      |  5 +++
 src/cfg/cfg-traversal.h | 95 ++++++++++++++++++++++++++++++++---------
 test/gtest/cfg.cpp      | 59 ++++++++++++++++++++++++-
 4 files changed, 150 insertions(+), 23 deletions(-)

diff --git a/src/analysis/cfg.cpp b/src/analysis/cfg.cpp
index ba638ec5ae5..5d939f7a622 100644
--- a/src/analysis/cfg.cpp
+++ b/src/analysis/cfg.cpp
@@ -62,6 +62,12 @@ CFG CFG::fromFunction(Function* func) {
     }
   }
 
+  assert(!cfg.blocks.empty());
+  cfg.blocks[0].entry = true;
+  if (builder.exit) {
+    oldToNewBlocks.at(builder.exit)->exit = true;
+  }
+
   // Move-construct a new CFG to get mandatory copy elision, preserving basic
   // block addresses through the return.
   return CFG(std::move(cfg));
@@ -96,6 +102,14 @@ void BasicBlock::print(std::ostream& os, Module* wasm, size_t start) const {
   }
   os << "]\n";
 
+  if (isEntry()) {
+    os << ";; entry\n";
+  }
+
+  if (isExit()) {
+    os << ";; exit\n";
+  }
+
   os << index << ":\n";
   size_t instIndex = start;
   for (auto* inst : *this) {
diff --git a/src/analysis/cfg.h b/src/analysis/cfg.h
index baabe959187..b05f3c47c11 100644
--- a/src/analysis/cfg.h
+++ b/src/analysis/cfg.h
@@ -51,8 +51,13 @@ struct BasicBlock {
 
   Index getIndex() const { return index; }
 
+  bool isEntry() const { return entry; }
+  bool isExit() const { return exit; }
+
 private:
   Index index;
+  bool entry = false;
+  bool exit = false;
   std::vector<Expression*> insts;
   std::vector<const BasicBlock*> predecessors;
   std::vector<const BasicBlock*> successors;
diff --git a/src/cfg/cfg-traversal.h b/src/cfg/cfg-traversal.h
index 4ae07504989..7a189988b7d 100644
--- a/src/cfg/cfg-traversal.h
+++ b/src/cfg/cfg-traversal.h
@@ -46,22 +46,18 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
     std::vector<BasicBlock*> out, in;
   };
 
-  // The entry block at the function's start. This always exists.
-  BasicBlock* entry;
-
-  // The exit block at the end, where control flow reaches the end of the
-  // function. If the function has a control flow path that flows out (that is,
-  // exits the function without a return or an unreachable or such), then that
-  // control flow path ends in this block. In particular, if the function flows
-  // out a value (as opposed to only returning values using an explicit return)
-  // then that value would be at the end of this block.
-  // Put another way, this block has a control flow edge out of the function,
-  // and that edge is not because of a return. (Hence an analysis that cares
-  // about reaching the end of the function must not only look for returns, but
-  // also the end of this block.)
-  // Note: It is possible for this block to not exist, if the function ends in
-  // a return or an unreachable.
-  BasicBlock* exit;
+  // The entry block at the function's start. This always exists, although it
+  // might be empty if the function is empty.
+  BasicBlock* entry = nullptr;
+
+  // The exit block for the function: either the single block that returns or
+  // flows values out of the function, or an empty synthetic block that is a
+  // successor of all such blocks. This block may not exist if a function
+  // traps or infinitely loops and therefore never exits.
+  //
+  // Analyses that care about reaching the end of the function can just look at
+  // this block instead of all the individual returns.
+  BasicBlock* exit = nullptr;
 
   // override this with code to create a BasicBlock if necessary
   BasicBlock* makeBasicBlock() { return new BasicBlock(); }
@@ -161,6 +157,32 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
     self->branches.erase(curr->name);
   }
 
+  // Whether we have created a synthetic, empty exit block for multiple other
+  // exit blocks to flow to.
+  bool hasSyntheticExit = false;
+
+  static void doEndReturn(SubType* self, Expression** currp) {
+    auto* last = self->currBasicBlock;
+    self->startUnreachableBlock();
+    if (!self->exit) {
+      // This is our first exit block and may be our only exit block, so just
+      // set it.
+      self->exit = last;
+    } else if (!self->hasSyntheticExit) {
+      // We now have multiple exit blocks, so we need to create a synthetic one.
+      // It will be added to the list of basic blocks at the end of the
+      // function.
+      auto* lastExit = self->exit;
+      self->exit = self->makeBasicBlock();
+      self->link(lastExit, self->exit);
+      self->link(last, self->exit);
+      self->hasSyntheticExit = true;
+    } else {
+      // We already have a synthetic exit block. Just link it up.
+      self->link(last, self->exit);
+    }
+  }
+
   static void doStartIfTrue(SubType* self, Expression** currp) {
     auto* last = self->currBasicBlock;
     self->link(last, self->startBasicBlock()); // ifTrue
@@ -387,6 +409,19 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
     self->startUnreachableBlock();
   }
 
+  static bool isReturnCall(Expression* curr) {
+    switch (curr->_id) {
+      case Expression::Id::CallId:
+        return curr->cast<Call>()->isReturn;
+      case Expression::Id::CallIndirectId:
+        return curr->cast<CallIndirect>()->isReturn;
+      case Expression::Id::CallRefId:
+        return curr->cast<CallRef>()->isReturn;
+      default:
+        WASM_UNREACHABLE("not a call");
+    }
+  }
+
   static void scan(SubType* self, Expression** currp) {
     Expression* curr = *currp;
 
@@ -414,13 +449,20 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
       case Expression::Id::CallId:
       case Expression::Id::CallIndirectId:
       case Expression::Id::CallRefId: {
-        auto* module = self->getModule();
-        if (!module || module->features.hasExceptionHandling()) {
-          // This call might throw, so run the code to handle that.
-          self->pushTask(SubType::doEndCall, currp);
+        if (isReturnCall(curr)) {
+          self->pushTask(SubType::doEndReturn, currp);
+        } else {
+          auto* module = self->getModule();
+          if (!module || module->features.hasExceptionHandling()) {
+            // This call might throw, so run the code to handle that.
+            self->pushTask(SubType::doEndCall, currp);
+          }
         }
         break;
       }
+      case Expression::Id::ReturnId:
+        self->pushTask(SubType::doEndReturn, currp);
+        break;
       case Expression::Id::TryId: {
         self->pushTask(SubType::doEndTry, currp);
         auto& catchBodies = curr->cast<Try>()->catchBodies;
@@ -466,7 +508,18 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
     startBasicBlock();
     entry = currBasicBlock;
     PostWalker<SubType, VisitorType>::doWalkFunction(func);
-    exit = currBasicBlock;
+
+    // The last block, if it exists, implicitly returns.
+    if (currBasicBlock) {
+      auto* self = static_cast<SubType*>(this);
+      self->doEndReturn(self, nullptr);
+    }
+
+    // If we have a synthetic exit block, add it to the list of basic blocks
+    // here so it always comes at the end.
+    if (hasSyntheticExit) {
+      basicBlocks.push_back(std::unique_ptr<BasicBlock>(exit));
+    }
 
     assert(branches.size() == 0);
     assert(ifStack.size() == 0);
diff --git a/test/gtest/cfg.cpp b/test/gtest/cfg.cpp
index 583c8c4f0d2..bf3742bc13d 100644
--- a/test/gtest/cfg.cpp
+++ b/test/gtest/cfg.cpp
@@ -44,6 +44,7 @@ TEST_F(CFGTest, Print) {
   )wasm";
 
   auto cfgText = R"cfg(;; preds: [], succs: [1, 5]
+;; entry
 0:
   0: i32.const 0
   1: drop
@@ -66,15 +67,19 @@ TEST_F(CFGTest, Print) {
   6: i32.const 3
   7: block
 
-;; preds: [0], succs: []
+;; preds: [0], succs: [7]
 5:
   8: i32.const 4
   9: return
 
-;; preds: [4], succs: []
+;; preds: [4], succs: [7]
 6:
   10: drop
   11: block
+
+;; preds: [5, 6], succs: []
+;; exit
+7:
 )cfg";
 
   Module wasm;
@@ -110,12 +115,14 @@ TEST_F(CFGTest, CallBlock) {
   )wasm";
 
   auto cfgText = R"cfg(;; preds: [], succs: [1]
+;; entry
 0:
   0: i32.const 0
   1: drop
   2: call $bar
 
 ;; preds: [0], succs: []
+;; exit
 1:
   3: i32.const 1
   4: drop
@@ -133,6 +140,54 @@ TEST_F(CFGTest, CallBlock) {
   EXPECT_EQ(ss.str(), cfgText);
 }
 
+TEST_F(CFGTest, Empty) {
+  // Check that we create a correct CFG for an empty function.
+  auto moduleText = R"wasm(
+    (module (func $foo))
+  )wasm";
+
+  auto cfgText = R"cfg(;; preds: [], succs: []
+;; entry
+;; exit
+0:
+  0: nop
+)cfg";
+
+  Module wasm;
+  parseWast(wasm, moduleText);
+
+  CFG cfg = CFG::fromFunction(wasm.getFunction("foo"));
+
+  std::stringstream ss;
+  cfg.print(ss);
+
+  EXPECT_EQ(ss.str(), cfgText);
+}
+
+TEST_F(CFGTest, Unreachable) {
+  // Check that we create a correct CFG for a function that does not return. In
+  // particular, it should not have an exit block.
+  auto moduleText = R"wasm(
+    (module (func $foo (unreachable)))
+  )wasm";
+
+  auto cfgText = R"cfg(;; preds: [], succs: []
+;; entry
+0:
+  0: unreachable
+)cfg";
+
+  Module wasm;
+  parseWast(wasm, moduleText);
+
+  CFG cfg = CFG::fromFunction(wasm.getFunction("foo"));
+
+  std::stringstream ss;
+  cfg.print(ss);
+
+  EXPECT_EQ(ss.str(), cfgText);
+}
+
 TEST_F(CFGTest, FinitePowersetLatticeFunctioning) {
 
   std::vector<std::string> initialSet = {"a", "b", "c", "d", "e", "f"};

From fac1218b6a461150dd42406128065ed947629e26 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Mon, 6 Nov 2023 13:37:04 -0800
Subject: [PATCH 2/2] mention throwing in comment

---
 src/cfg/cfg-traversal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cfg/cfg-traversal.h b/src/cfg/cfg-traversal.h
index 7a189988b7d..ca270685719 100644
--- a/src/cfg/cfg-traversal.h
+++ b/src/cfg/cfg-traversal.h
@@ -53,7 +53,7 @@ struct CFGWalker : public PostWalker<SubType, VisitorType> {
   // The exit block for the function: either the single block that returns or
   // flows values out of the function, or an empty synthetic block that is a
   // successor of all such blocks. This block may not exist if a function
-  // traps or infinitely loops and therefore never exits.
+  // traps, infinitely loops, throws, or otherwise never exits normally.
   //
   // Analyses that care about reaching the end of the function can just look at
   // this block instead of all the individual returns.