From 8543f1651e626630cdce02609c8e5e8717f789e8 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Wed, 18 Sep 2024 18:37:27 -0700
Subject: [PATCH 1/5] [NFC-ish] Eagerly create Functions in binary parser

In preparation for using IRBuilder in the binary parser, eagerly create
Functions when parsing the function section so that they are already
created once we parse the code section. IRBuilder will require the
functions to exist when parsing calls so it can figure out what type
each call should have, even when there is a call to a function whose
body has not been parsed yet.

NFC except that some error messages change to include the new empty
functions.
---
 src/wasm-binary.h                     |  5 +++++
 src/wasm/wasm-binary.cpp              | 25 ++++++++++++++-----------
 test/lit/binary/debug-bad-binary.test |  6 ++++++
 3 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index fe740d56a76..5021b6a297f 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1532,6 +1532,11 @@ class WasmBinaryReader {
   // reconstructing the HeapTypes from the Signatures is expensive.
   std::vector<HeapType> functionTypes;
 
+  // Used to make sure the number of imported functions, signatures, and
+  // declared functions all match up.
+  Index numFuncImports = 0;
+  Index numFuncBodies = 0;
+
   void readFunctionSignatures();
   HeapType getTypeByIndex(Index index);
   HeapType getTypeByFunctionIndex(Index index);
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index d8a310103da..96e9317094b 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -2578,6 +2578,7 @@ void WasmBinaryReader::readImports() {
       }
     }
   }
+  numFuncImports = wasm.functions.size();
 }
 
 Name WasmBinaryReader::getNextLabel() {
@@ -2595,9 +2596,15 @@ void WasmBinaryReader::readFunctionSignatures() {
   size_t num = getU32LEB();
   for (size_t i = 0; i < num; i++) {
     auto index = getU32LEB();
-    functionTypes.push_back(getTypeByIndex(index));
+    HeapType type = getTypeByIndex(index);
+    functionTypes.push_back(type);
     // Check that the type is a signature.
     getSignatureByTypeIndex(index);
+    // Create the function with a placeholder body so we don't trip over it if
+    // we have to print it as part of an error message.
+    Builder builder(wasm);
+    wasm.addFunction(builder.makeFunction(
+      makeName("", i), type, {}, builder.makeUnreachable()));
   }
 }
 
@@ -2633,12 +2640,11 @@ Signature WasmBinaryReader::getSignatureByFunctionIndex(Index index) {
 }
 
 void WasmBinaryReader::readFunctions() {
-  auto numImports = wasm.functions.size();
-  size_t total = getU32LEB();
-  if (total != functionTypes.size() - numImports) {
+  numFuncBodies = getU32LEB();
+  if (numFuncBodies + numFuncImports != wasm.functions.size()) {
     throwError("invalid function section size, must equal types");
   }
-  for (size_t i = 0; i < total; i++) {
+  for (size_t i = 0; i < numFuncBodies; i++) {
     auto sizePos = pos;
     size_t size = getU32LEB();
     if (size == 0) {
@@ -2646,9 +2652,7 @@ void WasmBinaryReader::readFunctions() {
     }
     endOfFunction = pos + size;
 
-    auto func = std::make_unique<Function>();
-    func->name = makeName("", i);
-    func->type = getTypeByFunctionIndex(numImports + i);
+    auto& func = wasm.functions[numFuncImports + i];
     currFunction = func.get();
 
     if (DWARF) {
@@ -2715,7 +2719,6 @@ void WasmBinaryReader::readFunctions() {
     std::swap(func->epilogLocation, debugLocation);
     currFunction = nullptr;
     debugLocation.clear();
-    wasm.addFunction(std::move(func));
   }
 }
 
@@ -3192,8 +3195,8 @@ void WasmBinaryReader::validateBinary() {
     throwError("Number of segments does not agree with DataCount section");
   }
 
-  if (functionTypes.size() != wasm.functions.size()) {
-    throwError("function section without code section");
+  if (functionTypes.size() != numFuncImports + numFuncBodies) {
+    throwError("function and code sections have inconsistent lengths");
   }
 }
 
diff --git a/test/lit/binary/debug-bad-binary.test b/test/lit/binary/debug-bad-binary.test
index 63a2ed2ccac..634146a7653 100644
--- a/test/lit/binary/debug-bad-binary.test
+++ b/test/lit/binary/debug-bad-binary.test
@@ -38,4 +38,10 @@ RUN: not wasm-opt --debug %s.wasm 2>&1 | filecheck %s
 ;; CHECK-NEXT:    (i32.const 10)
 ;; CHECK-NEXT:   )
 ;; CHECK-NEXT:  )
+;; CHECK-NEXT:  (func $1
+;; CHECK-NEXT:   (unreachable)
+;; CHECK-NEXT:  )
+;; CHECK-NEXT:  (func $2
+;; CHECK-NEXT:   (unreachable)
+;; CHECK-NEXT:  )
 ;; CHECK-NEXT: )

From b83bec4c82bbd1cb725b219473386c3902935a94 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Wed, 18 Sep 2024 21:17:55 -0700
Subject: [PATCH 2/5] [NFC] Eagerly create segments when parsing datacount

The purpose of the datacount section is to pre-declare how many data
segments there will be so that engines can allocate space for them
and not have to back patch subsequent instructions in the code section
that refer to them. Once we use IRBuilder in the binary parser, we will
have to have the data segments available by the time we parse
instructions that use them, so eagerly construct the data segments when
parsing the datacount section.
---
 src/wasm/wasm-binary.cpp | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 96e9317094b..11ee8fe2f46 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -3276,18 +3276,37 @@ void WasmBinaryReader::processNames() {
 void WasmBinaryReader::readDataSegmentCount() {
   hasDataCount = true;
   dataCount = getU32LEB();
+  // Eagerly create the data segments so they are available during parsing of
+  // the code section.
+  for (size_t i = 0; i < dataCount; ++i) {
+    auto curr = Builder::makeDataSegment();
+    curr->setName(Name::fromInt(i), false);
+    wasm.addDataSegment(std::move(curr));
+  }
 }
 
 void WasmBinaryReader::readDataSegments() {
   auto num = getU32LEB();
+  if (hasDataCount) {
+    if (num != dataCount) {
+      throwError("data count and data sections disagree on size");
+    }
+  } else {
+    // We haven't already created the data segments, so create them now.
+    for (size_t i = 0; i < num; ++i) {
+      auto curr = Builder::makeDataSegment();
+      curr->setName(Name::fromInt(i), false);
+      wasm.addDataSegment(std::move(curr));
+    }
+  }
+  assert(wasm.dataSegments.size() == num);
   for (size_t i = 0; i < num; i++) {
-    auto curr = Builder::makeDataSegment();
+    auto& curr = wasm.dataSegments[i];
     uint32_t flags = getU32LEB();
     if (flags > 2) {
       throwError("bad segment flags, must be 0, 1, or 2, not " +
                  std::to_string(flags));
     }
-    curr->setName(Name::fromInt(i), false);
     curr->isPassive = flags & BinaryConsts::IsPassive;
     if (curr->isPassive) {
       curr->memory = Name();
@@ -3303,7 +3322,6 @@ void WasmBinaryReader::readDataSegments() {
     auto size = getU32LEB();
     auto data = getByteView(size);
     curr->data = {data.begin(), data.end()};
-    wasm.addDataSegment(std::move(curr));
   }
 }
 

From a124292894c6ef0c2fa9b59256706da473869d56 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Wed, 18 Sep 2024 21:08:10 -0700
Subject: [PATCH 3/5] [NFC] Walk module to update names in binary parser

The binary parser generally does not know the final names of module
elements when it parses them, or even when it parses instructions that
refer to them, since the name section comes at the end of a binary. The
parser previously kept a list of pointers to locations where each module
element's name would have to be used, then it patched those locations
after parsing the names section to discover the final names.

When the binary parser starts using IRBuilder, the parsed expressions
will be constructed and managed by IRBuilder rather than by the parser
itself. This means that the parser will no longer be able to collect
pointers to places where module element names are used; it won't have
access to the instructions at all.

Since the strategy of collecting locations to patch will no longer work,
switch to a strategy of traversing the module to find and update names
instead. This is generally less efficient because the locations have to
be found before they can be updated, but on the other hand it only
happens when preserving debug info and it is parallelizable anyway.
---
 src/wasm-binary.h        |  32 +---
 src/wasm/wasm-binary.cpp | 388 +++++++++++++++++++--------------------
 2 files changed, 194 insertions(+), 226 deletions(-)

diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index 5021b6a297f..c394c400855 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1547,36 +1547,24 @@ class WasmBinaryReader {
 
   Name getNextLabel();
 
-  // We read functions and globals before we know their names, so we need to
-  // backpatch the names later
+  // We read functions, globals, etc. before we know their final names, so we
+  // need to backpatch the names later. Map the original names to their indices
+  // so we can find the final names based on index.
+  std::unordered_map<Name, Index> functionIndices;
+  std::unordered_map<Name, Index> tableIndices;
+  std::unordered_map<Name, Index> memoryIndices;
+  std::unordered_map<Name, Index> globalIndices;
+  std::unordered_map<Name, Index> tagIndices;
+  std::unordered_map<Name, Index> dataIndices;
+  std::unordered_map<Name, Index> elemIndices;
 
-  // at index i we have all refs to the function i
-  std::map<Index, std::vector<Name*>> functionRefs;
   Function* currFunction = nullptr;
   // before we see a function (like global init expressions), there is no end of
   // function to check
   Index endOfFunction = -1;
 
-  // at index i we have all references to the table i
-  std::map<Index, std::vector<Name*>> tableRefs;
-
   std::map<Index, Name> elemTables;
 
-  // at index i we have all references to the memory i
-  std::map<Index, std::vector<wasm::Name*>> memoryRefs;
-
-  // at index i we have all refs to the global i
-  std::map<Index, std::vector<Name*>> globalRefs;
-
-  // at index i we have all refs to the tag i
-  std::map<Index, std::vector<Name*>> tagRefs;
-
-  // at index i we have all refs to the data segment i
-  std::map<Index, std::vector<Name*>> dataRefs;
-
-  // at index i we have all refs to the element segment i
-  std::map<Index, std::vector<Name*>> elemRefs;
-
   // Throws a parsing error if we are not in a function context
   void requireFunctionContext(const char* error);
 
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 11ee8fe2f46..f146e04abaa 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -23,6 +23,7 @@
 #include "ir/names.h"
 #include "ir/table-utils.h"
 #include "ir/type-updating.h"
+#include "pass.h"
 #include "support/bits.h"
 #include "support/debug.h"
 #include "support/stdckdint.h"
@@ -2222,6 +2223,7 @@ void WasmBinaryReader::readMemories() {
                        memory->shared,
                        memory->indexType,
                        Memory::kUnlimitedSize);
+    memoryIndices[memory->name] = wasm.memories.size();
     wasm.addMemory(std::move(memory));
   }
 }
@@ -2487,11 +2489,6 @@ void WasmBinaryReader::getResizableLimits(Address& initial,
 void WasmBinaryReader::readImports() {
   size_t num = getU32LEB();
   Builder builder(wasm);
-  size_t tableCounter = 0;
-  size_t memoryCounter = 0;
-  size_t functionCounter = 0;
-  size_t globalCounter = 0;
-  size_t tagCounter = 0;
   for (size_t i = 0; i < num; i++) {
     auto module = getInlineString();
     auto base = getInlineString();
@@ -2501,7 +2498,7 @@ void WasmBinaryReader::readImports() {
     // could occur later due to the names section.
     switch (kind) {
       case ExternalKind::Function: {
-        Name name = makeName("fimport$", functionCounter++);
+        Name name = makeName("fimport$", wasm.functions.size());
         auto index = getU32LEB();
         functionTypes.push_back(getTypeByIndex(index));
         auto type = getTypeByIndex(index);
@@ -2513,11 +2510,13 @@ void WasmBinaryReader::readImports() {
         auto curr = builder.makeFunction(name, type, {});
         curr->module = module;
         curr->base = base;
+        functionIndices[name] = wasm.functions.size();
         wasm.addFunction(std::move(curr));
         break;
       }
       case ExternalKind::Table: {
-        auto table = builder.makeTable(makeName("timport$", tableCounter++));
+        Name name = makeName("timport$", wasm.tables.size());
+        auto table = builder.makeTable(name);
         table->module = module;
         table->base = base;
         table->type = getType();
@@ -2531,12 +2530,13 @@ void WasmBinaryReader::readImports() {
         if (is_shared) {
           throwError("Tables may not be shared");
         }
-
+        tableIndices[name] = wasm.tables.size();
         wasm.addTable(std::move(table));
         break;
       }
       case ExternalKind::Memory: {
-        auto memory = builder.makeMemory(makeName("mimport$", memoryCounter++));
+        Name name = makeName("mimport$", wasm.memories.size());
+        auto memory = builder.makeMemory(name);
         memory->module = module;
         memory->base = base;
         getResizableLimits(memory->initial,
@@ -2544,32 +2544,36 @@ void WasmBinaryReader::readImports() {
                            memory->shared,
                            memory->indexType,
                            Memory::kUnlimitedSize);
+        memoryIndices[name] = wasm.memories.size();
         wasm.addMemory(std::move(memory));
         break;
       }
       case ExternalKind::Global: {
+        Name name = makeName("gimport$", wasm.globals.size());
         auto type = getConcreteType();
         auto mutable_ = getU32LEB();
         if (mutable_ & ~1) {
           throwError("Global mutability must be 0 or 1");
         }
         auto curr =
-          builder.makeGlobal(makeName("gimport$", globalCounter++),
+          builder.makeGlobal(name,
                              type,
                              nullptr,
                              mutable_ ? Builder::Mutable : Builder::Immutable);
         curr->module = module;
         curr->base = base;
+        globalIndices[name] = wasm.globals.size();
         wasm.addGlobal(std::move(curr));
         break;
       }
       case ExternalKind::Tag: {
-        Name name = makeName("eimport$", tagCounter++);
+        Name name = makeName("eimport$", wasm.tags.size());
         getInt8(); // Reserved 'attribute' field
         auto index = getU32LEB();
         auto curr = builder.makeTag(name, getSignatureByTypeIndex(index));
         curr->module = module;
         curr->base = base;
+        tagIndices[name] = wasm.tags.size();
         wasm.addTag(std::move(curr));
         break;
       }
@@ -2603,8 +2607,10 @@ void WasmBinaryReader::readFunctionSignatures() {
     // Create the function with a placeholder body so we don't trip over it if
     // we have to print it as part of an error message.
     Builder builder(wasm);
-    wasm.addFunction(builder.makeFunction(
-      makeName("", i), type, {}, builder.makeUnreachable()));
+    auto name = makeName("", i);
+    functionIndices[name] = wasm.functions.size();
+    wasm.addFunction(
+      builder.makeFunction(name, type, {}, builder.makeUnreachable()));
   }
 }
 
@@ -3004,11 +3010,13 @@ void WasmBinaryReader::readGlobals() {
       throwError("Global mutability must be 0 or 1");
     }
     auto* init = readExpression();
-    wasm.addGlobal(
+    auto global =
       Builder::makeGlobal(makeName("global$", i),
                           type,
                           init,
-                          mutable_ ? Builder::Mutable : Builder::Immutable));
+                          mutable_ ? Builder::Mutable : Builder::Immutable);
+    globalIndices[global->name] = wasm.globals.size();
+    wasm.addGlobal(std::move(global));
   }
 }
 
@@ -3201,7 +3209,112 @@ void WasmBinaryReader::validateBinary() {
 }
 
 void WasmBinaryReader::processNames() {
-  // now that we have names, apply things
+  // Now that we have final names for module elements, update the names used in
+  // expressions and anywhere else.
+  struct NameUpdater
+    : WalkerPass<
+        PostWalker<NameUpdater, UnifiedExpressionVisitor<NameUpdater>>> {
+    WasmBinaryReader& parent;
+    NameUpdater(WasmBinaryReader& parent) : parent(parent) {}
+    bool isFunctionParallel() override { return true; }
+    std::unique_ptr<Pass> create() override {
+      return std::make_unique<NameUpdater>(parent);
+    }
+
+    void visitExpression(Expression* curr) {
+      auto& wasm = *getModule();
+
+#define DELEGATE_ID curr->_id
+#define DELEGATE_START(id) [[maybe_unused]] auto* cast = curr->cast<id>();
+#define DELEGATE_GET_FIELD(id, field) cast->field
+#define DELEGATE_FIELD_TYPE(id, field)
+#define DELEGATE_FIELD_HEAPTYPE(id, field)
+#define DELEGATE_FIELD_CHILD(id, field)
+#define DELEGATE_FIELD_OPTIONAL_CHILD(id, field)
+#define DELEGATE_FIELD_INT(id, field)
+#define DELEGATE_FIELD_LITERAL(id, field)
+#define DELEGATE_FIELD_NAME(id, field)
+#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field)
+#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field)
+#define DELEGATE_FIELD_ADDRESS(id, field)
+
+#define DELEGATE_FIELD_NAME_KIND(id, field, kind)                              \
+  switch (kind) {                                                              \
+    case ModuleItemKind::Function:                                             \
+      if (auto it = parent.functionIndices.find(cast->field);                  \
+          it != parent.functionIndices.end()) {                                \
+        cast->field = wasm.functions[it->second]->name;                        \
+      }                                                                        \
+      break;                                                                   \
+    case ModuleItemKind::Table:                                                \
+      if (auto it = parent.tableIndices.find(cast->field);                     \
+          it != parent.tableIndices.end()) {                                   \
+        cast->field = wasm.tables[it->second]->name;                           \
+      }                                                                        \
+      break;                                                                   \
+    case ModuleItemKind::Memory:                                               \
+      if (auto it = parent.memoryIndices.find(cast->field);                    \
+          it != parent.memoryIndices.end()) {                                  \
+        cast->field = wasm.memories[it->second]->name;                         \
+      }                                                                        \
+      break;                                                                   \
+    case ModuleItemKind::Global:                                               \
+      if (auto it = parent.globalIndices.find(cast->field);                    \
+          it != parent.globalIndices.end()) {                                  \
+        cast->field = wasm.globals[it->second]->name;                          \
+      }                                                                        \
+      break;                                                                   \
+    case ModuleItemKind::Tag:                                                  \
+      if (auto it = parent.tagIndices.find(cast->field);                       \
+          it != parent.tagIndices.end()) {                                     \
+        cast->field = wasm.tags[it->second]->name;                             \
+      }                                                                        \
+      break;                                                                   \
+    case ModuleItemKind::DataSegment:                                          \
+      if (auto it = parent.dataIndices.find(cast->field);                      \
+          it != parent.dataIndices.end()) {                                    \
+        cast->field = wasm.dataSegments[it->second]->name;                     \
+      }                                                                        \
+      break;                                                                   \
+    case ModuleItemKind::ElementSegment:                                       \
+      if (auto it = parent.elemIndices.find(cast->field);                      \
+          it != parent.elemIndices.end()) {                                    \
+        cast->field = wasm.elementSegments[it->second]->name;                  \
+      }                                                                        \
+      break;                                                                   \
+    case ModuleItemKind::Invalid:                                              \
+      WASM_UNREACHABLE("unexpected kind");                                     \
+  }
+#include "wasm-delegations-fields.def"
+    }
+  };
+
+  if (debugInfo) {
+    // Perform the replacement. Mark the runner nested to avoid unnecessary
+    // validation.
+    PassRunner runner(&wasm);
+    runner.setIsNested(true);
+    runner.add(std::make_unique<NameUpdater>(*this));
+    runner.run();
+    NameUpdater(*this).runOnModuleCode(&runner, &wasm);
+
+    for (auto& seg : wasm.elementSegments) {
+      if (seg->table) {
+        if (auto it = tableIndices.find(seg->table); it != tableIndices.end()) {
+          seg->table = wasm.tables[it->second]->name;
+        }
+      }
+    }
+
+    for (auto& seg : wasm.dataSegments) {
+      if (seg->memory) {
+        if (auto it = memoryIndices.find(seg->memory);
+            it != memoryIndices.end()) {
+          seg->memory = wasm.memories[it->second]->name;
+        }
+      }
+    }
+  }
 
   if (startIndex != static_cast<Index>(-1)) {
     wasm.start = getFunctionName(startIndex);
@@ -3232,44 +3345,7 @@ void WasmBinaryReader::processNames() {
     wasm.addExport(std::move(curr));
   }
 
-  for (auto& [index, refs] : functionRefs) {
-    for (auto* ref : refs) {
-      *ref = getFunctionName(index);
-    }
-  }
-  for (auto& [index, refs] : tableRefs) {
-    for (auto* ref : refs) {
-      *ref = getTableName(index);
-    }
-  }
-  for (auto& [index, refs] : memoryRefs) {
-    for (auto ref : refs) {
-      *ref = getMemoryName(index);
-    }
-  }
-  for (auto& [index, refs] : globalRefs) {
-    for (auto* ref : refs) {
-      *ref = getGlobalName(index);
-    }
-  }
-  for (auto& [index, refs] : tagRefs) {
-    for (auto* ref : refs) {
-      *ref = getTagName(index);
-    }
-  }
-  for (auto& [index, refs] : dataRefs) {
-    for (auto* ref : refs) {
-      *ref = getDataName(index);
-    }
-  }
-  for (auto& [index, refs] : elemRefs) {
-    for (auto* ref : refs) {
-      *ref = getElemName(index);
-    }
-  }
-
   // Everything now has its proper name.
-
   wasm.updateMaps();
 }
 
@@ -3281,6 +3357,7 @@ void WasmBinaryReader::readDataSegmentCount() {
   for (size_t i = 0; i < dataCount; ++i) {
     auto curr = Builder::makeDataSegment();
     curr->setName(Name::fromInt(i), false);
+    dataIndices[curr->name] = wasm.dataSegments.size();
     wasm.addDataSegment(std::move(curr));
   }
 }
@@ -3296,6 +3373,7 @@ void WasmBinaryReader::readDataSegments() {
     for (size_t i = 0; i < num; ++i) {
       auto curr = Builder::makeDataSegment();
       curr->setName(Name::fromInt(i), false);
+      dataIndices[curr->name] = wasm.dataSegments.size();
       wasm.addDataSegment(std::move(curr));
     }
   }
@@ -3316,7 +3394,7 @@ void WasmBinaryReader::readDataSegments() {
       if (flags & BinaryConsts::HasIndex) {
         memIdx = getU32LEB();
       }
-      memoryRefs[memIdx].push_back(&curr->memory);
+      curr->memory = getMemoryName(memIdx);
       curr->offset = readExpression();
     }
     auto size = getU32LEB();
@@ -3343,6 +3421,7 @@ void WasmBinaryReader::readTableDeclarations() {
     if (is_shared) {
       throwError("Tables may not be shared");
     }
+    tableIndices[table->name] = wasm.tables.size();
     wasm.addTable(std::move(table));
   }
 }
@@ -3413,13 +3492,12 @@ void WasmBinaryReader::readElementSegments() {
       for (Index j = 0; j < size; j++) {
         Index index = getU32LEB();
         auto sig = getTypeByFunctionIndex(index);
-        // Use a placeholder name for now
-        auto* refFunc = Builder(wasm).makeRefFunc(Name::fromInt(index), sig);
-        functionRefs[index].push_back(&refFunc->func);
+        auto* refFunc = Builder(wasm).makeRefFunc(getFunctionName(index), sig);
         segmentData.push_back(refFunc);
       }
     }
 
+    elemIndices[segment->name] = wasm.elementSegments.size();
     wasm.addElementSegment(std::move(segment));
   }
 }
@@ -3429,8 +3507,10 @@ void WasmBinaryReader::readTags() {
   for (size_t i = 0; i < numTags; i++) {
     getInt8(); // Reserved 'attribute' field
     auto typeIndex = getU32LEB();
-    wasm.addTag(Builder::makeTag(makeName("tag$", i),
-                                 getSignatureByTypeIndex(typeIndex)));
+    auto tag =
+      Builder::makeTag(makeName("tag$", i), getSignatureByTypeIndex(typeIndex));
+    tagIndices[tag->name] = wasm.tags.size();
+    wasm.addTag(std::move(tag));
   }
 }
 
@@ -4557,21 +4637,20 @@ void WasmBinaryReader::visitSwitch(Switch* curr) {
 void WasmBinaryReader::visitCall(Call* curr) {
   auto index = getU32LEB();
   auto sig = getSignatureByFunctionIndex(index);
+  curr->target = getFunctionName(index);
   auto num = sig.params.size();
   curr->operands.resize(num);
   for (size_t i = 0; i < num; i++) {
     curr->operands[num - i - 1] = popNonVoidExpression();
   }
   curr->type = sig.results;
-  // We don't know function names yet.
-  functionRefs[index].push_back(&curr->target);
   curr->finalize();
 }
 
 void WasmBinaryReader::visitCallIndirect(CallIndirect* curr) {
   auto index = getU32LEB();
   curr->heapType = getTypeByIndex(index);
-  Index tableIdx = getU32LEB();
+  curr->table = getTableName(getU32LEB());
   // TODO: Handle error cases where `heapType` is not a signature?
   auto num = curr->heapType.getSignature().params.size();
   curr->operands.resize(num);
@@ -4579,8 +4658,6 @@ void WasmBinaryReader::visitCallIndirect(CallIndirect* curr) {
   for (size_t i = 0; i < num; i++) {
     curr->operands[num - i - 1] = popNonVoidExpression();
   }
-  // Defer setting the table name for later, when we know it.
-  tableRefs[tableIdx].push_back(&curr->table);
   curr->finalize();
 }
 
@@ -4617,7 +4694,6 @@ void WasmBinaryReader::visitGlobalGet(GlobalGet* curr) {
   auto* global = wasm.globals[index].get();
   curr->name = global->name;
   curr->type = global->type;
-  globalRefs[index].push_back(&curr->name); // we don't know the final name yet
 }
 
 void WasmBinaryReader::visitGlobalSet(GlobalSet* curr) {
@@ -4627,7 +4703,6 @@ void WasmBinaryReader::visitGlobalSet(GlobalSet* curr) {
   }
   curr->name = wasm.globals[index]->name;
   curr->value = popNonVoidExpression();
-  globalRefs[index].push_back(&curr->name); // we don't know the final name yet
   curr->finalize();
 }
 
@@ -4802,7 +4877,7 @@ bool WasmBinaryReader::maybeVisitLoad(
 
   curr->isAtomic = prefix == BinaryConsts::AtomicPrefix;
   Index memIdx = readMemoryAccess(curr->align, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   curr->ptr = popNonVoidExpression();
   curr->finalize();
   out = curr;
@@ -4920,7 +4995,7 @@ bool WasmBinaryReader::maybeVisitStore(
 
   curr->isAtomic = prefix == BinaryConsts::AtomicPrefix;
   Index memIdx = readMemoryAccess(curr->align, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   curr->value = popNonVoidExpression();
   curr->ptr = popNonVoidExpression();
   curr->finalize();
@@ -4980,7 +5055,7 @@ bool WasmBinaryReader::maybeVisitAtomicRMW(Expression*& out, uint8_t code) {
 
   Address readAlign;
   Index memIdx = readMemoryAccess(readAlign, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   if (readAlign != curr->bytes) {
     throwError("Align of AtomicRMW must match size");
   }
@@ -5031,7 +5106,7 @@ bool WasmBinaryReader::maybeVisitAtomicCmpxchg(Expression*& out, uint8_t code) {
 
   Address readAlign;
   Index memIdx = readMemoryAccess(readAlign, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   if (readAlign != curr->bytes) {
     throwError("Align of AtomicCpxchg must match size");
   }
@@ -5066,7 +5141,7 @@ bool WasmBinaryReader::maybeVisitAtomicWait(Expression*& out, uint8_t code) {
   curr->ptr = popNonVoidExpression();
   Address readAlign;
   Index memIdx = readMemoryAccess(readAlign, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   if (readAlign != curr->expectedType.getByteSize()) {
     throwError("Align of AtomicWait must match size");
   }
@@ -5086,7 +5161,7 @@ bool WasmBinaryReader::maybeVisitAtomicNotify(Expression*& out, uint8_t code) {
   curr->ptr = popNonVoidExpression();
   Address readAlign;
   Index memIdx = readMemoryAccess(readAlign, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   if (readAlign != curr->type.getByteSize()) {
     throwError("Align of AtomicNotify must match size");
   }
@@ -5412,10 +5487,8 @@ bool WasmBinaryReader::maybeVisitMemoryInit(Expression*& out, uint32_t code) {
   curr->size = popNonVoidExpression();
   curr->offset = popNonVoidExpression();
   curr->dest = popNonVoidExpression();
-  Index segIdx = getU32LEB();
-  dataRefs[segIdx].push_back(&curr->segment);
-  Index memIdx = getU32LEB();
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->segment = getDataName(getU32LEB());
+  curr->memory = getMemoryName(getU32LEB());
   curr->finalize();
   out = curr;
   return true;
@@ -5426,8 +5499,7 @@ bool WasmBinaryReader::maybeVisitDataDrop(Expression*& out, uint32_t code) {
     return false;
   }
   auto* curr = allocator.alloc<DataDrop>();
-  Index segIdx = getU32LEB();
-  dataRefs[segIdx].push_back(&curr->segment);
+  curr->segment = getDataName(getU32LEB());
   curr->finalize();
   out = curr;
   return true;
@@ -5441,11 +5513,9 @@ bool WasmBinaryReader::maybeVisitMemoryCopy(Expression*& out, uint32_t code) {
   curr->size = popNonVoidExpression();
   curr->source = popNonVoidExpression();
   curr->dest = popNonVoidExpression();
-  Index destIdx = getU32LEB();
-  Index sourceIdx = getU32LEB();
+  curr->destMemory = getMemoryName(getU32LEB());
+  curr->sourceMemory = getMemoryName(getU32LEB());
   curr->finalize();
-  memoryRefs[destIdx].push_back(&curr->destMemory);
-  memoryRefs[sourceIdx].push_back(&curr->sourceMemory);
   out = curr;
   return true;
 }
@@ -5458,9 +5528,8 @@ bool WasmBinaryReader::maybeVisitMemoryFill(Expression*& out, uint32_t code) {
   curr->size = popNonVoidExpression();
   curr->value = popNonVoidExpression();
   curr->dest = popNonVoidExpression();
-  Index memIdx = getU32LEB();
+  curr->memory = getMemoryName(getU32LEB());
   curr->finalize();
-  memoryRefs[memIdx].push_back(&curr->memory);
   out = curr;
   return true;
 }
@@ -5469,17 +5538,13 @@ bool WasmBinaryReader::maybeVisitTableSize(Expression*& out, uint32_t code) {
   if (code != BinaryConsts::TableSize) {
     return false;
   }
-  Index tableIdx = getU32LEB();
-  if (tableIdx >= wasm.tables.size()) {
-    throwError("bad table index");
-  }
   auto* curr = allocator.alloc<TableSize>();
+  Index tableIdx = getU32LEB();
+  curr->table = getTableName(tableIdx);
   if (getTable(tableIdx)->is64()) {
     curr->type = Type::i64;
   }
   curr->finalize();
-  // Defer setting the table name for later, when we know it.
-  tableRefs[tableIdx].push_back(&curr->table);
   out = curr;
   return true;
 }
@@ -5488,19 +5553,15 @@ bool WasmBinaryReader::maybeVisitTableGrow(Expression*& out, uint32_t code) {
   if (code != BinaryConsts::TableGrow) {
     return false;
   }
-  Index tableIdx = getU32LEB();
-  if (tableIdx >= wasm.tables.size()) {
-    throwError("bad table index");
-  }
   auto* curr = allocator.alloc<TableGrow>();
+  Index tableIdx = getU32LEB();
+  curr->table = getTableName(tableIdx);
   curr->delta = popNonVoidExpression();
   curr->value = popNonVoidExpression();
   if (getTable(tableIdx)->is64()) {
     curr->type = Type::i64;
   }
   curr->finalize();
-  // Defer setting the table name for later, when we know it.
-  tableRefs[tableIdx].push_back(&curr->table);
   out = curr;
   return true;
 }
@@ -5509,16 +5570,11 @@ bool WasmBinaryReader::maybeVisitTableFill(Expression*& out, uint32_t code) {
   if (code != BinaryConsts::TableFill) {
     return false;
   }
-  Index tableIdx = getU32LEB();
-  if (tableIdx >= wasm.tables.size()) {
-    throwError("bad table index");
-  }
+  auto table = getTableName(getU32LEB());
   auto* size = popNonVoidExpression();
   auto* value = popNonVoidExpression();
   auto* dest = popNonVoidExpression();
-  auto* ret = Builder(wasm).makeTableFill(Name(), dest, value, size);
-  tableRefs[tableIdx].push_back(&ret->table);
-  out = ret;
+  out = Builder(wasm).makeTableFill(table, dest, value, size);
   return true;
 }
 
@@ -5526,21 +5582,12 @@ bool WasmBinaryReader::maybeVisitTableCopy(Expression*& out, uint32_t code) {
   if (code != BinaryConsts::TableCopy) {
     return false;
   }
-  Index destTableIdx = getU32LEB();
-  if (destTableIdx >= wasm.tables.size()) {
-    throwError("bad table index");
-  }
-  Index sourceTableIdx = getU32LEB();
-  if (sourceTableIdx >= wasm.tables.size()) {
-    throwError("bad table index");
-  }
+  auto destTable = getTableName(getU32LEB());
+  auto srcTable = getTableName(getU32LEB());
   auto* size = popNonVoidExpression();
   auto* source = popNonVoidExpression();
   auto* dest = popNonVoidExpression();
-  auto* ret = Builder(wasm).makeTableCopy(dest, source, size, Name(), Name());
-  tableRefs[destTableIdx].push_back(&ret->destTable);
-  tableRefs[sourceTableIdx].push_back(&ret->sourceTable);
-  out = ret;
+  out = Builder(wasm).makeTableCopy(dest, source, size, destTable, srcTable);
   return true;
 }
 
@@ -5552,10 +5599,8 @@ bool WasmBinaryReader::maybeVisitTableInit(Expression*& out, uint32_t code) {
   curr->size = popNonVoidExpression();
   curr->offset = popNonVoidExpression();
   curr->dest = popNonVoidExpression();
-  Index segIdx = getU32LEB();
-  elemRefs[segIdx].push_back(&curr->segment);
-  Index memIdx = getU32LEB();
-  tableRefs[memIdx].push_back(&curr->table);
+  curr->segment = getElemName(getU32LEB());
+  curr->table = getTableName(getU32LEB());
   curr->finalize();
   out = curr;
   return true;
@@ -6553,7 +6598,7 @@ bool WasmBinaryReader::maybeVisitSIMDStore(Expression*& out, uint32_t code) {
   curr->bytes = 16;
   curr->valueType = Type::v128;
   Index memIdx = readMemoryAccess(curr->align, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   curr->isAtomic = false;
   curr->value = popNonVoidExpression();
   curr->ptr = popNonVoidExpression();
@@ -6811,7 +6856,7 @@ bool WasmBinaryReader::maybeVisitSIMDLoad(Expression*& out, uint32_t code) {
     curr->type = Type::v128;
     curr->bytes = 16;
     Index memIdx = readMemoryAccess(curr->align, curr->offset);
-    memoryRefs[memIdx].push_back(&curr->memory);
+    curr->memory = getMemoryName(memIdx);
     curr->isAtomic = false;
     curr->ptr = popNonVoidExpression();
     curr->finalize();
@@ -6872,7 +6917,7 @@ bool WasmBinaryReader::maybeVisitSIMDLoad(Expression*& out, uint32_t code) {
       return false;
   }
   Index memIdx = readMemoryAccess(curr->align, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   curr->ptr = popNonVoidExpression();
   curr->finalize();
   out = curr;
@@ -6922,7 +6967,7 @@ bool WasmBinaryReader::maybeVisitSIMDLoadStoreLane(Expression*& out,
   auto* curr = allocator.alloc<SIMDLoadStoreLane>();
   curr->op = op;
   Index memIdx = readMemoryAccess(curr->align, curr->offset);
-  memoryRefs[memIdx].push_back(&curr->memory);
+  curr->memory = getMemoryName(memIdx);
   curr->index = getLaneIndex(lanes);
   curr->vec = popNonVoidExpression();
   curr->ptr = popNonVoidExpression();
@@ -6965,20 +7010,21 @@ void WasmBinaryReader::visitReturn(Return* curr) {
 
 void WasmBinaryReader::visitMemorySize(MemorySize* curr) {
   Index index = getU32LEB();
+  curr->memory = getMemoryName(index);
   if (getMemory(index)->is64()) {
     curr->type = Type::i64;
   }
   curr->finalize();
-  memoryRefs[index].push_back(&curr->memory);
 }
 
 void WasmBinaryReader::visitMemoryGrow(MemoryGrow* curr) {
-  curr->delta = popNonVoidExpression();
   Index index = getU32LEB();
+  curr->memory = getMemoryName(index);
   if (getMemory(index)->is64()) {
     curr->type = Type::i64;
   }
-  memoryRefs[index].push_back(&curr->memory);
+  curr->delta = popNonVoidExpression();
+  curr->finalize();
 }
 
 void WasmBinaryReader::visitNop(Nop* curr) {}
@@ -7001,14 +7047,9 @@ void WasmBinaryReader::visitRefIsNull(RefIsNull* curr) {
 
 void WasmBinaryReader::visitRefFunc(RefFunc* curr) {
   Index index = getU32LEB();
-  // We don't know function names yet, so record this use to be updated later.
-  // Note that we do not need to check that 'index' is in bounds, as that will
-  // be verified in the next line. (Also, note that functionRefs[index] may
-  // write to an odd place in the functionRefs map if index is invalid, but that
-  // is harmless.)
-  functionRefs[index].push_back(&curr->func);
   // To support typed function refs, we give the reference not just a general
   // funcref, but a specific subtype with the actual signature.
+  curr->func = getFunctionName(index);
   curr->finalize(Type(getTypeByFunctionIndex(index), NonNullable));
 }
 
@@ -7020,26 +7061,18 @@ void WasmBinaryReader::visitRefEq(RefEq* curr) {
 
 void WasmBinaryReader::visitTableGet(TableGet* curr) {
   Index tableIdx = getU32LEB();
-  if (tableIdx >= wasm.tables.size()) {
-    throwError("bad table index");
-  }
+  curr->table = getTableName(tableIdx);
   curr->index = popNonVoidExpression();
   curr->type = wasm.tables[tableIdx]->type;
   curr->finalize();
-  // Defer setting the table name for later, when we know it.
-  tableRefs[tableIdx].push_back(&curr->table);
 }
 
 void WasmBinaryReader::visitTableSet(TableSet* curr) {
   Index tableIdx = getU32LEB();
-  if (tableIdx >= wasm.tables.size()) {
-    throwError("bad table index");
-  }
+  curr->table = getTableName(tableIdx);
   curr->value = popNonVoidExpression();
   curr->index = popNonVoidExpression();
   curr->finalize();
-  // Defer setting the table name for later, when we know it.
-  tableRefs[tableIdx].push_back(&curr->table);
 }
 
 void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) {
@@ -7076,11 +7109,6 @@ void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) {
     }
   };
 
-  // We cannot immediately update tagRefs in the loop below, as catchTags is
-  // being grown, an so references would get invalidated. Store the indexes
-  // here, then do that later.
-  std::vector<Index> tagIndexes;
-
   while (lastSeparator == BinaryConsts::Catch_Legacy ||
          lastSeparator == BinaryConsts::CatchAll_Legacy) {
     if (lastSeparator == BinaryConsts::Catch_Legacy) {
@@ -7088,7 +7116,6 @@ void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) {
       if (index >= wasm.tags.size()) {
         throwError("bad tag index");
       }
-      tagIndexes.push_back(index);
       auto* tag = wasm.tags[index].get();
       curr->catchTags.push_back(tag->name);
       readCatchBody(tag->sig.params);
@@ -7101,11 +7128,6 @@ void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) {
   }
   breakStack.pop_back();
 
-  for (Index i = 0; i < tagIndexes.size(); i++) {
-    // We don't know the final name yet.
-    tagRefs[tagIndexes[i]].push_back(&curr->catchTags[i]);
-  }
-
   if (lastSeparator == BinaryConsts::Delegate) {
     curr->delegateTarget = getExceptionTargetName(getU32LEB());
   }
@@ -7200,10 +7222,6 @@ void WasmBinaryReader::visitTryTable(TryTable* curr) {
   // within each try-body, and let branches target those inner blocks instead.
   curr->type = getType();
   auto numCatches = getU32LEB();
-  // We cannot immediately update tagRefs in the loop below, as catchTags is
-  // being grown, an so references would get invalidated. Store the indexes
-  // here, then do that later.
-  std::vector<Index> tagIndexes;
 
   for (size_t i = 0; i < numCatches; i++) {
     uint8_t code = getInt8();
@@ -7212,11 +7230,9 @@ void WasmBinaryReader::visitTryTable(TryTable* curr) {
       if (index >= wasm.tags.size()) {
         throwError("bad tag index");
       }
-      tagIndexes.push_back(index);
       auto* tag = wasm.tags[index].get();
       curr->catchTags.push_back(tag->name);
     } else {
-      tagIndexes.push_back(-1); // unused
       curr->catchTags.push_back(Name());
     }
     curr->catchDests.push_back(getBreakTarget(getU32LEB()).name);
@@ -7224,13 +7240,6 @@ void WasmBinaryReader::visitTryTable(TryTable* curr) {
                               code == BinaryConsts::CatchAllRef);
   }
 
-  for (Index i = 0; i < tagIndexes.size(); i++) {
-    if (curr->catchTags[i]) {
-      // We don't know the final name yet.
-      tagRefs[tagIndexes[i]].push_back(&curr->catchTags[i]);
-    }
-  }
-
   // catch_*** clauses should refer to block labels without entering the try
   // scope. So we do this after reading catch clauses.
   startControlFlow(curr);
@@ -7245,7 +7254,6 @@ void WasmBinaryReader::visitThrow(Throw* curr) {
   }
   auto* tag = wasm.tags[index].get();
   curr->tag = tag->name;
-  tagRefs[index].push_back(&curr->tag); // we don't know the final name yet
   size_t num = tag->sig.params.size();
   curr->operands.resize(num);
   for (size_t i = 0; i < num; i++) {
@@ -7493,18 +7501,13 @@ bool WasmBinaryReader::maybeVisitArrayNewElem(Expression*& out, uint32_t code) {
       throwError("Expected array heaptype");
     }
     auto segIdx = getU32LEB();
+    auto segment = isData ? getDataName(segIdx) : getElemName(segIdx);
     auto* size = popNonVoidExpression();
     auto* offset = popNonVoidExpression();
     if (isData) {
-      auto* curr =
-        Builder(wasm).makeArrayNewData(heapType, Name(), offset, size);
-      dataRefs[segIdx].push_back(&curr->segment);
-      out = curr;
+      out = Builder(wasm).makeArrayNewData(heapType, segment, offset, size);
     } else {
-      auto* curr =
-        Builder(wasm).makeArrayNewElem(heapType, Name(), offset, size);
-      elemRefs[segIdx].push_back(&curr->segment);
-      out = curr;
+      out = Builder(wasm).makeArrayNewElem(heapType, segment, offset, size);
     }
     return true;
   }
@@ -7635,21 +7638,16 @@ bool WasmBinaryReader::maybeVisitArrayInit(Expression*& out, uint32_t code) {
     throwError("Expected array heaptype");
   }
   Index segIdx = getU32LEB();
+  auto segment = isData ? getDataName(segIdx) : getElemName(segIdx);
   auto* size = popNonVoidExpression();
   auto* offset = popNonVoidExpression();
   auto* index = popNonVoidExpression();
   auto* ref = popNonVoidExpression();
   validateHeapTypeUsingChild(ref, heapType);
   if (isData) {
-    auto* curr =
-      Builder(wasm).makeArrayInitData(Name(), ref, index, offset, size);
-    dataRefs[segIdx].push_back(&curr->segment);
-    out = curr;
+    out = Builder(wasm).makeArrayInitData(segment, ref, index, offset, size);
   } else {
-    auto* curr =
-      Builder(wasm).makeArrayInitElem(Name(), ref, index, offset, size);
-    elemRefs[segIdx].push_back(&curr->segment);
-    out = curr;
+    out = Builder(wasm).makeArrayInitElem(segment, ref, index, offset, size);
   }
   return true;
 }
@@ -7858,25 +7856,12 @@ void WasmBinaryReader::visitResume(Resume* curr) {
   }
 
   auto numHandlers = getU32LEB();
-
-  // We *must* bring the handlerTags vector to an appropriate size to ensure
-  // that we do not invalidate the pointers we add to tagRefs. They need to stay
-  // valid until processNames ran.
   curr->handlerTags.resize(numHandlers);
   curr->handlerBlocks.resize(numHandlers);
 
   for (size_t i = 0; i < numHandlers; i++) {
-    auto tagIndex = getU32LEB();
-    auto tag = getTagName(tagIndex);
-
-    auto handlerIndex = getU32LEB();
-    auto handler = getBreakTarget(handlerIndex).name;
-
-    curr->handlerTags[i] = tag;
-    curr->handlerBlocks[i] = handler;
-
-    // We don't know the final name yet
-    tagRefs[tagIndex].push_back(&curr->handlerTags[i]);
+    curr->handlerTags[i] = getTagName(getU32LEB());
+    curr->handlerBlocks[i] = getBreakTarget(getU32LEB()).name;
   }
 
   curr->cont = popNonVoidExpression();
@@ -7892,14 +7877,9 @@ void WasmBinaryReader::visitResume(Resume* curr) {
 }
 
 void WasmBinaryReader::visitSuspend(Suspend* curr) {
-
   auto tagIndex = getU32LEB();
-  if (tagIndex >= wasm.tags.size()) {
-    throwError("bad tag index");
-  }
+  curr->tag = getTagName(tagIndex);
   auto* tag = wasm.tags[tagIndex].get();
-  curr->tag = tag->name;
-  tagRefs[tagIndex].push_back(&curr->tag);
 
   auto numArgs = tag->sig.params.size();
   curr->operands.resize(numArgs);

From 8f4ca9f5e87c4921ce46eb316b0f281ba80c64f6 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 19 Sep 2024 14:18:22 -0700
Subject: [PATCH 4/5] skip unparsed functions when printing

---
 src/ir/module-utils.cpp               | 5 ++++-
 src/passes/Print.cpp                  | 3 +++
 src/wasm/wasm-binary.cpp              | 7 ++-----
 test/lit/binary/debug-bad-binary.test | 6 ------
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp
index 1efbe490a07..490955866b0 100644
--- a/src/ir/module-utils.cpp
+++ b/src/ir/module-utils.cpp
@@ -448,7 +448,10 @@ InsertOrderedMap<HeapType, HeapTypeInfo> collectHeapTypeInfo(
       for (auto type : func->vars) {
         info.note(type);
       }
-      if (!func->imported()) {
+      // Don't just use `func->imported()` here because we also might be
+      // printing an error message on a partially parsed module whose declared
+      // function bodies have not all been parsed yet.
+      if (func->body) {
         CodeScanner(wasm, info, inclusion).walk(func->body);
       }
     });
diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 46d519e9eda..d49bc297472 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -2980,6 +2980,9 @@ void PrintSExpression::visitDefinedGlobal(Global* curr) {
 void PrintSExpression::visitFunction(Function* curr) {
   if (curr->imported()) {
     visitImportedFunction(curr);
+  } else if (curr->body == nullptr) {
+    // We are in the middle of parsing the module and have not parsed this
+    // function's code yet. Skip it.
   } else {
     visitDefinedFunction(curr);
   }
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 96e9317094b..d999141fd4a 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -2600,11 +2600,8 @@ void WasmBinaryReader::readFunctionSignatures() {
     functionTypes.push_back(type);
     // Check that the type is a signature.
     getSignatureByTypeIndex(index);
-    // Create the function with a placeholder body so we don't trip over it if
-    // we have to print it as part of an error message.
-    Builder builder(wasm);
-    wasm.addFunction(builder.makeFunction(
-      makeName("", i), type, {}, builder.makeUnreachable()));
+    wasm.addFunction(
+      Builder(wasm).makeFunction(makeName("", i), type, {}, nullptr));
   }
 }
 
diff --git a/test/lit/binary/debug-bad-binary.test b/test/lit/binary/debug-bad-binary.test
index 634146a7653..63a2ed2ccac 100644
--- a/test/lit/binary/debug-bad-binary.test
+++ b/test/lit/binary/debug-bad-binary.test
@@ -38,10 +38,4 @@ RUN: not wasm-opt --debug %s.wasm 2>&1 | filecheck %s
 ;; CHECK-NEXT:    (i32.const 10)
 ;; CHECK-NEXT:   )
 ;; CHECK-NEXT:  )
-;; CHECK-NEXT:  (func $1
-;; CHECK-NEXT:   (unreachable)
-;; CHECK-NEXT:  )
-;; CHECK-NEXT:  (func $2
-;; CHECK-NEXT:   (unreachable)
-;; CHECK-NEXT:  )
 ;; CHECK-NEXT: )

From 8d8eb1397e6c6c0b108f2895c7a6007ee8438b0d Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 19 Sep 2024 15:19:48 -0700
Subject: [PATCH 5/5] update and fix test of datacount error

---
 src/passes/Print.cpp                             |   5 +++++
 test/lit/binary/bad-datacount.test               |   9 +++++++++
 .../binary/bad-datacount.test.wasm}              | Bin
 test/unit/test_datacount.py                      |  15 ---------------
 4 files changed, 14 insertions(+), 15 deletions(-)
 create mode 100644 test/lit/binary/bad-datacount.test
 rename test/{unit/input/bulkmem_bad_datacount.wasm => lit/binary/bad-datacount.test.wasm} (100%)
 delete mode 100644 test/unit/test_datacount.py

diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp
index 46d519e9eda..2fbdb62798f 100644
--- a/src/passes/Print.cpp
+++ b/src/passes/Print.cpp
@@ -3235,6 +3235,11 @@ void PrintSExpression::visitMemory(Memory* curr) {
 }
 
 void PrintSExpression::visitDataSegment(DataSegment* curr) {
+  if (!curr->isPassive && !curr->offset) {
+    // This data segment must have been created from the datacount section but
+    // not parsed yet. Skip it.
+    return;
+  }
   doIndent(o, indent);
   o << '(';
   printMajor(o, "data ");
diff --git a/test/lit/binary/bad-datacount.test b/test/lit/binary/bad-datacount.test
new file mode 100644
index 00000000000..27e0c72a398
--- /dev/null
+++ b/test/lit/binary/bad-datacount.test
@@ -0,0 +1,9 @@
+RUN: not wasm-opt --debug %s.wasm 2>&1 | filecheck %s
+
+;; Check that we get the expected error.
+
+;; CHECK: data count and data sections disagree on size
+
+;; Check that we print out the module rather than hitting an assertion error.
+
+;; CHECK: (module
diff --git a/test/unit/input/bulkmem_bad_datacount.wasm b/test/lit/binary/bad-datacount.test.wasm
similarity index 100%
rename from test/unit/input/bulkmem_bad_datacount.wasm
rename to test/lit/binary/bad-datacount.test.wasm
diff --git a/test/unit/test_datacount.py b/test/unit/test_datacount.py
deleted file mode 100644
index b996865a53c..00000000000
--- a/test/unit/test_datacount.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from scripts.test import shared
-from . import utils
-
-
-class DataCountTest(utils.BinaryenTestCase):
-    def test_datacount(self):
-        self.roundtrip('bulkmem_data.wasm')
-
-    def test_bad_datacount(self):
-        path = self.input_path('bulkmem_bad_datacount.wasm')
-        p = shared.run_process(shared.WASM_OPT + ['-g', '-o', '-', path],
-                               check=False, capture_output=True)
-        self.assertNotEqual(p.returncode, 0)
-        self.assertIn('Number of segments does not agree with DataCount section',
-                      p.stderr)