From 8543f1651e626630cdce02609c8e5e8717f789e8 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 18 Sep 2024 18:37:27 -0700 Subject: [PATCH 1/5] [NFC-ish] Eagerly create Functions in binary parser In preparation for using IRBuilder in the binary parser, eagerly create Functions when parsing the function section so that they are already created once we parse the code section. IRBuilder will require the functions to exist when parsing calls so it can figure out what type each call should have, even when there is a call to a function whose body has not been parsed yet. NFC except that some error messages change to include the new empty functions. --- src/wasm-binary.h | 5 +++++ src/wasm/wasm-binary.cpp | 25 ++++++++++++++----------- test/lit/binary/debug-bad-binary.test | 6 ++++++ 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index fe740d56a76..5021b6a297f 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1532,6 +1532,11 @@ class WasmBinaryReader { // reconstructing the HeapTypes from the Signatures is expensive. std::vector functionTypes; + // Used to make sure the number of imported functions, signatures, and + // declared functions all match up. + Index numFuncImports = 0; + Index numFuncBodies = 0; + void readFunctionSignatures(); HeapType getTypeByIndex(Index index); HeapType getTypeByFunctionIndex(Index index); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index d8a310103da..96e9317094b 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2578,6 +2578,7 @@ void WasmBinaryReader::readImports() { } } } + numFuncImports = wasm.functions.size(); } Name WasmBinaryReader::getNextLabel() { @@ -2595,9 +2596,15 @@ void WasmBinaryReader::readFunctionSignatures() { size_t num = getU32LEB(); for (size_t i = 0; i < num; i++) { auto index = getU32LEB(); - functionTypes.push_back(getTypeByIndex(index)); + HeapType type = getTypeByIndex(index); + functionTypes.push_back(type); // Check that the type is a signature. getSignatureByTypeIndex(index); + // Create the function with a placeholder body so we don't trip over it if + // we have to print it as part of an error message. + Builder builder(wasm); + wasm.addFunction(builder.makeFunction( + makeName("", i), type, {}, builder.makeUnreachable())); } } @@ -2633,12 +2640,11 @@ Signature WasmBinaryReader::getSignatureByFunctionIndex(Index index) { } void WasmBinaryReader::readFunctions() { - auto numImports = wasm.functions.size(); - size_t total = getU32LEB(); - if (total != functionTypes.size() - numImports) { + numFuncBodies = getU32LEB(); + if (numFuncBodies + numFuncImports != wasm.functions.size()) { throwError("invalid function section size, must equal types"); } - for (size_t i = 0; i < total; i++) { + for (size_t i = 0; i < numFuncBodies; i++) { auto sizePos = pos; size_t size = getU32LEB(); if (size == 0) { @@ -2646,9 +2652,7 @@ void WasmBinaryReader::readFunctions() { } endOfFunction = pos + size; - auto func = std::make_unique(); - func->name = makeName("", i); - func->type = getTypeByFunctionIndex(numImports + i); + auto& func = wasm.functions[numFuncImports + i]; currFunction = func.get(); if (DWARF) { @@ -2715,7 +2719,6 @@ void WasmBinaryReader::readFunctions() { std::swap(func->epilogLocation, debugLocation); currFunction = nullptr; debugLocation.clear(); - wasm.addFunction(std::move(func)); } } @@ -3192,8 +3195,8 @@ void WasmBinaryReader::validateBinary() { throwError("Number of segments does not agree with DataCount section"); } - if (functionTypes.size() != wasm.functions.size()) { - throwError("function section without code section"); + if (functionTypes.size() != numFuncImports + numFuncBodies) { + throwError("function and code sections have inconsistent lengths"); } } diff --git a/test/lit/binary/debug-bad-binary.test b/test/lit/binary/debug-bad-binary.test index 63a2ed2ccac..634146a7653 100644 --- a/test/lit/binary/debug-bad-binary.test +++ b/test/lit/binary/debug-bad-binary.test @@ -38,4 +38,10 @@ RUN: not wasm-opt --debug %s.wasm 2>&1 | filecheck %s ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) +;; CHECK-NEXT: (func $1 +;; CHECK-NEXT: (unreachable) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (func $2 +;; CHECK-NEXT: (unreachable) +;; CHECK-NEXT: ) ;; CHECK-NEXT: ) From b83bec4c82bbd1cb725b219473386c3902935a94 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 18 Sep 2024 21:17:55 -0700 Subject: [PATCH 2/5] [NFC] Eagerly create segments when parsing datacount The purpose of the datacount section is to pre-declare how many data segments there will be so that engines can allocate space for them and not have to back patch subsequent instructions in the code section that refer to them. Once we use IRBuilder in the binary parser, we will have to have the data segments available by the time we parse instructions that use them, so eagerly construct the data segments when parsing the datacount section. --- src/wasm/wasm-binary.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 96e9317094b..11ee8fe2f46 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -3276,18 +3276,37 @@ void WasmBinaryReader::processNames() { void WasmBinaryReader::readDataSegmentCount() { hasDataCount = true; dataCount = getU32LEB(); + // Eagerly create the data segments so they are available during parsing of + // the code section. + for (size_t i = 0; i < dataCount; ++i) { + auto curr = Builder::makeDataSegment(); + curr->setName(Name::fromInt(i), false); + wasm.addDataSegment(std::move(curr)); + } } void WasmBinaryReader::readDataSegments() { auto num = getU32LEB(); + if (hasDataCount) { + if (num != dataCount) { + throwError("data count and data sections disagree on size"); + } + } else { + // We haven't already created the data segments, so create them now. + for (size_t i = 0; i < num; ++i) { + auto curr = Builder::makeDataSegment(); + curr->setName(Name::fromInt(i), false); + wasm.addDataSegment(std::move(curr)); + } + } + assert(wasm.dataSegments.size() == num); for (size_t i = 0; i < num; i++) { - auto curr = Builder::makeDataSegment(); + auto& curr = wasm.dataSegments[i]; uint32_t flags = getU32LEB(); if (flags > 2) { throwError("bad segment flags, must be 0, 1, or 2, not " + std::to_string(flags)); } - curr->setName(Name::fromInt(i), false); curr->isPassive = flags & BinaryConsts::IsPassive; if (curr->isPassive) { curr->memory = Name(); @@ -3303,7 +3322,6 @@ void WasmBinaryReader::readDataSegments() { auto size = getU32LEB(); auto data = getByteView(size); curr->data = {data.begin(), data.end()}; - wasm.addDataSegment(std::move(curr)); } } From a124292894c6ef0c2fa9b59256706da473869d56 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 18 Sep 2024 21:08:10 -0700 Subject: [PATCH 3/5] [NFC] Walk module to update names in binary parser The binary parser generally does not know the final names of module elements when it parses them, or even when it parses instructions that refer to them, since the name section comes at the end of a binary. The parser previously kept a list of pointers to locations where each module element's name would have to be used, then it patched those locations after parsing the names section to discover the final names. When the binary parser starts using IRBuilder, the parsed expressions will be constructed and managed by IRBuilder rather than by the parser itself. This means that the parser will no longer be able to collect pointers to places where module element names are used; it won't have access to the instructions at all. Since the strategy of collecting locations to patch will no longer work, switch to a strategy of traversing the module to find and update names instead. This is generally less efficient because the locations have to be found before they can be updated, but on the other hand it only happens when preserving debug info and it is parallelizable anyway. --- src/wasm-binary.h | 32 +--- src/wasm/wasm-binary.cpp | 388 +++++++++++++++++++-------------------- 2 files changed, 194 insertions(+), 226 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 5021b6a297f..c394c400855 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1547,36 +1547,24 @@ class WasmBinaryReader { Name getNextLabel(); - // We read functions and globals before we know their names, so we need to - // backpatch the names later + // We read functions, globals, etc. before we know their final names, so we + // need to backpatch the names later. Map the original names to their indices + // so we can find the final names based on index. + std::unordered_map functionIndices; + std::unordered_map tableIndices; + std::unordered_map memoryIndices; + std::unordered_map globalIndices; + std::unordered_map tagIndices; + std::unordered_map dataIndices; + std::unordered_map elemIndices; - // at index i we have all refs to the function i - std::map> functionRefs; Function* currFunction = nullptr; // before we see a function (like global init expressions), there is no end of // function to check Index endOfFunction = -1; - // at index i we have all references to the table i - std::map> tableRefs; - std::map elemTables; - // at index i we have all references to the memory i - std::map> memoryRefs; - - // at index i we have all refs to the global i - std::map> globalRefs; - - // at index i we have all refs to the tag i - std::map> tagRefs; - - // at index i we have all refs to the data segment i - std::map> dataRefs; - - // at index i we have all refs to the element segment i - std::map> elemRefs; - // Throws a parsing error if we are not in a function context void requireFunctionContext(const char* error); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 11ee8fe2f46..f146e04abaa 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -23,6 +23,7 @@ #include "ir/names.h" #include "ir/table-utils.h" #include "ir/type-updating.h" +#include "pass.h" #include "support/bits.h" #include "support/debug.h" #include "support/stdckdint.h" @@ -2222,6 +2223,7 @@ void WasmBinaryReader::readMemories() { memory->shared, memory->indexType, Memory::kUnlimitedSize); + memoryIndices[memory->name] = wasm.memories.size(); wasm.addMemory(std::move(memory)); } } @@ -2487,11 +2489,6 @@ void WasmBinaryReader::getResizableLimits(Address& initial, void WasmBinaryReader::readImports() { size_t num = getU32LEB(); Builder builder(wasm); - size_t tableCounter = 0; - size_t memoryCounter = 0; - size_t functionCounter = 0; - size_t globalCounter = 0; - size_t tagCounter = 0; for (size_t i = 0; i < num; i++) { auto module = getInlineString(); auto base = getInlineString(); @@ -2501,7 +2498,7 @@ void WasmBinaryReader::readImports() { // could occur later due to the names section. switch (kind) { case ExternalKind::Function: { - Name name = makeName("fimport$", functionCounter++); + Name name = makeName("fimport$", wasm.functions.size()); auto index = getU32LEB(); functionTypes.push_back(getTypeByIndex(index)); auto type = getTypeByIndex(index); @@ -2513,11 +2510,13 @@ void WasmBinaryReader::readImports() { auto curr = builder.makeFunction(name, type, {}); curr->module = module; curr->base = base; + functionIndices[name] = wasm.functions.size(); wasm.addFunction(std::move(curr)); break; } case ExternalKind::Table: { - auto table = builder.makeTable(makeName("timport$", tableCounter++)); + Name name = makeName("timport$", wasm.tables.size()); + auto table = builder.makeTable(name); table->module = module; table->base = base; table->type = getType(); @@ -2531,12 +2530,13 @@ void WasmBinaryReader::readImports() { if (is_shared) { throwError("Tables may not be shared"); } - + tableIndices[name] = wasm.tables.size(); wasm.addTable(std::move(table)); break; } case ExternalKind::Memory: { - auto memory = builder.makeMemory(makeName("mimport$", memoryCounter++)); + Name name = makeName("mimport$", wasm.memories.size()); + auto memory = builder.makeMemory(name); memory->module = module; memory->base = base; getResizableLimits(memory->initial, @@ -2544,32 +2544,36 @@ void WasmBinaryReader::readImports() { memory->shared, memory->indexType, Memory::kUnlimitedSize); + memoryIndices[name] = wasm.memories.size(); wasm.addMemory(std::move(memory)); break; } case ExternalKind::Global: { + Name name = makeName("gimport$", wasm.globals.size()); auto type = getConcreteType(); auto mutable_ = getU32LEB(); if (mutable_ & ~1) { throwError("Global mutability must be 0 or 1"); } auto curr = - builder.makeGlobal(makeName("gimport$", globalCounter++), + builder.makeGlobal(name, type, nullptr, mutable_ ? Builder::Mutable : Builder::Immutable); curr->module = module; curr->base = base; + globalIndices[name] = wasm.globals.size(); wasm.addGlobal(std::move(curr)); break; } case ExternalKind::Tag: { - Name name = makeName("eimport$", tagCounter++); + Name name = makeName("eimport$", wasm.tags.size()); getInt8(); // Reserved 'attribute' field auto index = getU32LEB(); auto curr = builder.makeTag(name, getSignatureByTypeIndex(index)); curr->module = module; curr->base = base; + tagIndices[name] = wasm.tags.size(); wasm.addTag(std::move(curr)); break; } @@ -2603,8 +2607,10 @@ void WasmBinaryReader::readFunctionSignatures() { // Create the function with a placeholder body so we don't trip over it if // we have to print it as part of an error message. Builder builder(wasm); - wasm.addFunction(builder.makeFunction( - makeName("", i), type, {}, builder.makeUnreachable())); + auto name = makeName("", i); + functionIndices[name] = wasm.functions.size(); + wasm.addFunction( + builder.makeFunction(name, type, {}, builder.makeUnreachable())); } } @@ -3004,11 +3010,13 @@ void WasmBinaryReader::readGlobals() { throwError("Global mutability must be 0 or 1"); } auto* init = readExpression(); - wasm.addGlobal( + auto global = Builder::makeGlobal(makeName("global$", i), type, init, - mutable_ ? Builder::Mutable : Builder::Immutable)); + mutable_ ? Builder::Mutable : Builder::Immutable); + globalIndices[global->name] = wasm.globals.size(); + wasm.addGlobal(std::move(global)); } } @@ -3201,7 +3209,112 @@ void WasmBinaryReader::validateBinary() { } void WasmBinaryReader::processNames() { - // now that we have names, apply things + // Now that we have final names for module elements, update the names used in + // expressions and anywhere else. + struct NameUpdater + : WalkerPass< + PostWalker>> { + WasmBinaryReader& parent; + NameUpdater(WasmBinaryReader& parent) : parent(parent) {} + bool isFunctionParallel() override { return true; } + std::unique_ptr create() override { + return std::make_unique(parent); + } + + void visitExpression(Expression* curr) { + auto& wasm = *getModule(); + +#define DELEGATE_ID curr->_id +#define DELEGATE_START(id) [[maybe_unused]] auto* cast = curr->cast(); +#define DELEGATE_GET_FIELD(id, field) cast->field +#define DELEGATE_FIELD_TYPE(id, field) +#define DELEGATE_FIELD_HEAPTYPE(id, field) +#define DELEGATE_FIELD_CHILD(id, field) +#define DELEGATE_FIELD_OPTIONAL_CHILD(id, field) +#define DELEGATE_FIELD_INT(id, field) +#define DELEGATE_FIELD_LITERAL(id, field) +#define DELEGATE_FIELD_NAME(id, field) +#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field) +#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field) +#define DELEGATE_FIELD_ADDRESS(id, field) + +#define DELEGATE_FIELD_NAME_KIND(id, field, kind) \ + switch (kind) { \ + case ModuleItemKind::Function: \ + if (auto it = parent.functionIndices.find(cast->field); \ + it != parent.functionIndices.end()) { \ + cast->field = wasm.functions[it->second]->name; \ + } \ + break; \ + case ModuleItemKind::Table: \ + if (auto it = parent.tableIndices.find(cast->field); \ + it != parent.tableIndices.end()) { \ + cast->field = wasm.tables[it->second]->name; \ + } \ + break; \ + case ModuleItemKind::Memory: \ + if (auto it = parent.memoryIndices.find(cast->field); \ + it != parent.memoryIndices.end()) { \ + cast->field = wasm.memories[it->second]->name; \ + } \ + break; \ + case ModuleItemKind::Global: \ + if (auto it = parent.globalIndices.find(cast->field); \ + it != parent.globalIndices.end()) { \ + cast->field = wasm.globals[it->second]->name; \ + } \ + break; \ + case ModuleItemKind::Tag: \ + if (auto it = parent.tagIndices.find(cast->field); \ + it != parent.tagIndices.end()) { \ + cast->field = wasm.tags[it->second]->name; \ + } \ + break; \ + case ModuleItemKind::DataSegment: \ + if (auto it = parent.dataIndices.find(cast->field); \ + it != parent.dataIndices.end()) { \ + cast->field = wasm.dataSegments[it->second]->name; \ + } \ + break; \ + case ModuleItemKind::ElementSegment: \ + if (auto it = parent.elemIndices.find(cast->field); \ + it != parent.elemIndices.end()) { \ + cast->field = wasm.elementSegments[it->second]->name; \ + } \ + break; \ + case ModuleItemKind::Invalid: \ + WASM_UNREACHABLE("unexpected kind"); \ + } +#include "wasm-delegations-fields.def" + } + }; + + if (debugInfo) { + // Perform the replacement. Mark the runner nested to avoid unnecessary + // validation. + PassRunner runner(&wasm); + runner.setIsNested(true); + runner.add(std::make_unique(*this)); + runner.run(); + NameUpdater(*this).runOnModuleCode(&runner, &wasm); + + for (auto& seg : wasm.elementSegments) { + if (seg->table) { + if (auto it = tableIndices.find(seg->table); it != tableIndices.end()) { + seg->table = wasm.tables[it->second]->name; + } + } + } + + for (auto& seg : wasm.dataSegments) { + if (seg->memory) { + if (auto it = memoryIndices.find(seg->memory); + it != memoryIndices.end()) { + seg->memory = wasm.memories[it->second]->name; + } + } + } + } if (startIndex != static_cast(-1)) { wasm.start = getFunctionName(startIndex); @@ -3232,44 +3345,7 @@ void WasmBinaryReader::processNames() { wasm.addExport(std::move(curr)); } - for (auto& [index, refs] : functionRefs) { - for (auto* ref : refs) { - *ref = getFunctionName(index); - } - } - for (auto& [index, refs] : tableRefs) { - for (auto* ref : refs) { - *ref = getTableName(index); - } - } - for (auto& [index, refs] : memoryRefs) { - for (auto ref : refs) { - *ref = getMemoryName(index); - } - } - for (auto& [index, refs] : globalRefs) { - for (auto* ref : refs) { - *ref = getGlobalName(index); - } - } - for (auto& [index, refs] : tagRefs) { - for (auto* ref : refs) { - *ref = getTagName(index); - } - } - for (auto& [index, refs] : dataRefs) { - for (auto* ref : refs) { - *ref = getDataName(index); - } - } - for (auto& [index, refs] : elemRefs) { - for (auto* ref : refs) { - *ref = getElemName(index); - } - } - // Everything now has its proper name. - wasm.updateMaps(); } @@ -3281,6 +3357,7 @@ void WasmBinaryReader::readDataSegmentCount() { for (size_t i = 0; i < dataCount; ++i) { auto curr = Builder::makeDataSegment(); curr->setName(Name::fromInt(i), false); + dataIndices[curr->name] = wasm.dataSegments.size(); wasm.addDataSegment(std::move(curr)); } } @@ -3296,6 +3373,7 @@ void WasmBinaryReader::readDataSegments() { for (size_t i = 0; i < num; ++i) { auto curr = Builder::makeDataSegment(); curr->setName(Name::fromInt(i), false); + dataIndices[curr->name] = wasm.dataSegments.size(); wasm.addDataSegment(std::move(curr)); } } @@ -3316,7 +3394,7 @@ void WasmBinaryReader::readDataSegments() { if (flags & BinaryConsts::HasIndex) { memIdx = getU32LEB(); } - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); curr->offset = readExpression(); } auto size = getU32LEB(); @@ -3343,6 +3421,7 @@ void WasmBinaryReader::readTableDeclarations() { if (is_shared) { throwError("Tables may not be shared"); } + tableIndices[table->name] = wasm.tables.size(); wasm.addTable(std::move(table)); } } @@ -3413,13 +3492,12 @@ void WasmBinaryReader::readElementSegments() { for (Index j = 0; j < size; j++) { Index index = getU32LEB(); auto sig = getTypeByFunctionIndex(index); - // Use a placeholder name for now - auto* refFunc = Builder(wasm).makeRefFunc(Name::fromInt(index), sig); - functionRefs[index].push_back(&refFunc->func); + auto* refFunc = Builder(wasm).makeRefFunc(getFunctionName(index), sig); segmentData.push_back(refFunc); } } + elemIndices[segment->name] = wasm.elementSegments.size(); wasm.addElementSegment(std::move(segment)); } } @@ -3429,8 +3507,10 @@ void WasmBinaryReader::readTags() { for (size_t i = 0; i < numTags; i++) { getInt8(); // Reserved 'attribute' field auto typeIndex = getU32LEB(); - wasm.addTag(Builder::makeTag(makeName("tag$", i), - getSignatureByTypeIndex(typeIndex))); + auto tag = + Builder::makeTag(makeName("tag$", i), getSignatureByTypeIndex(typeIndex)); + tagIndices[tag->name] = wasm.tags.size(); + wasm.addTag(std::move(tag)); } } @@ -4557,21 +4637,20 @@ void WasmBinaryReader::visitSwitch(Switch* curr) { void WasmBinaryReader::visitCall(Call* curr) { auto index = getU32LEB(); auto sig = getSignatureByFunctionIndex(index); + curr->target = getFunctionName(index); auto num = sig.params.size(); curr->operands.resize(num); for (size_t i = 0; i < num; i++) { curr->operands[num - i - 1] = popNonVoidExpression(); } curr->type = sig.results; - // We don't know function names yet. - functionRefs[index].push_back(&curr->target); curr->finalize(); } void WasmBinaryReader::visitCallIndirect(CallIndirect* curr) { auto index = getU32LEB(); curr->heapType = getTypeByIndex(index); - Index tableIdx = getU32LEB(); + curr->table = getTableName(getU32LEB()); // TODO: Handle error cases where `heapType` is not a signature? auto num = curr->heapType.getSignature().params.size(); curr->operands.resize(num); @@ -4579,8 +4658,6 @@ void WasmBinaryReader::visitCallIndirect(CallIndirect* curr) { for (size_t i = 0; i < num; i++) { curr->operands[num - i - 1] = popNonVoidExpression(); } - // Defer setting the table name for later, when we know it. - tableRefs[tableIdx].push_back(&curr->table); curr->finalize(); } @@ -4617,7 +4694,6 @@ void WasmBinaryReader::visitGlobalGet(GlobalGet* curr) { auto* global = wasm.globals[index].get(); curr->name = global->name; curr->type = global->type; - globalRefs[index].push_back(&curr->name); // we don't know the final name yet } void WasmBinaryReader::visitGlobalSet(GlobalSet* curr) { @@ -4627,7 +4703,6 @@ void WasmBinaryReader::visitGlobalSet(GlobalSet* curr) { } curr->name = wasm.globals[index]->name; curr->value = popNonVoidExpression(); - globalRefs[index].push_back(&curr->name); // we don't know the final name yet curr->finalize(); } @@ -4802,7 +4877,7 @@ bool WasmBinaryReader::maybeVisitLoad( curr->isAtomic = prefix == BinaryConsts::AtomicPrefix; Index memIdx = readMemoryAccess(curr->align, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); curr->ptr = popNonVoidExpression(); curr->finalize(); out = curr; @@ -4920,7 +4995,7 @@ bool WasmBinaryReader::maybeVisitStore( curr->isAtomic = prefix == BinaryConsts::AtomicPrefix; Index memIdx = readMemoryAccess(curr->align, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); curr->value = popNonVoidExpression(); curr->ptr = popNonVoidExpression(); curr->finalize(); @@ -4980,7 +5055,7 @@ bool WasmBinaryReader::maybeVisitAtomicRMW(Expression*& out, uint8_t code) { Address readAlign; Index memIdx = readMemoryAccess(readAlign, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); if (readAlign != curr->bytes) { throwError("Align of AtomicRMW must match size"); } @@ -5031,7 +5106,7 @@ bool WasmBinaryReader::maybeVisitAtomicCmpxchg(Expression*& out, uint8_t code) { Address readAlign; Index memIdx = readMemoryAccess(readAlign, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); if (readAlign != curr->bytes) { throwError("Align of AtomicCpxchg must match size"); } @@ -5066,7 +5141,7 @@ bool WasmBinaryReader::maybeVisitAtomicWait(Expression*& out, uint8_t code) { curr->ptr = popNonVoidExpression(); Address readAlign; Index memIdx = readMemoryAccess(readAlign, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); if (readAlign != curr->expectedType.getByteSize()) { throwError("Align of AtomicWait must match size"); } @@ -5086,7 +5161,7 @@ bool WasmBinaryReader::maybeVisitAtomicNotify(Expression*& out, uint8_t code) { curr->ptr = popNonVoidExpression(); Address readAlign; Index memIdx = readMemoryAccess(readAlign, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); if (readAlign != curr->type.getByteSize()) { throwError("Align of AtomicNotify must match size"); } @@ -5412,10 +5487,8 @@ bool WasmBinaryReader::maybeVisitMemoryInit(Expression*& out, uint32_t code) { curr->size = popNonVoidExpression(); curr->offset = popNonVoidExpression(); curr->dest = popNonVoidExpression(); - Index segIdx = getU32LEB(); - dataRefs[segIdx].push_back(&curr->segment); - Index memIdx = getU32LEB(); - memoryRefs[memIdx].push_back(&curr->memory); + curr->segment = getDataName(getU32LEB()); + curr->memory = getMemoryName(getU32LEB()); curr->finalize(); out = curr; return true; @@ -5426,8 +5499,7 @@ bool WasmBinaryReader::maybeVisitDataDrop(Expression*& out, uint32_t code) { return false; } auto* curr = allocator.alloc(); - Index segIdx = getU32LEB(); - dataRefs[segIdx].push_back(&curr->segment); + curr->segment = getDataName(getU32LEB()); curr->finalize(); out = curr; return true; @@ -5441,11 +5513,9 @@ bool WasmBinaryReader::maybeVisitMemoryCopy(Expression*& out, uint32_t code) { curr->size = popNonVoidExpression(); curr->source = popNonVoidExpression(); curr->dest = popNonVoidExpression(); - Index destIdx = getU32LEB(); - Index sourceIdx = getU32LEB(); + curr->destMemory = getMemoryName(getU32LEB()); + curr->sourceMemory = getMemoryName(getU32LEB()); curr->finalize(); - memoryRefs[destIdx].push_back(&curr->destMemory); - memoryRefs[sourceIdx].push_back(&curr->sourceMemory); out = curr; return true; } @@ -5458,9 +5528,8 @@ bool WasmBinaryReader::maybeVisitMemoryFill(Expression*& out, uint32_t code) { curr->size = popNonVoidExpression(); curr->value = popNonVoidExpression(); curr->dest = popNonVoidExpression(); - Index memIdx = getU32LEB(); + curr->memory = getMemoryName(getU32LEB()); curr->finalize(); - memoryRefs[memIdx].push_back(&curr->memory); out = curr; return true; } @@ -5469,17 +5538,13 @@ bool WasmBinaryReader::maybeVisitTableSize(Expression*& out, uint32_t code) { if (code != BinaryConsts::TableSize) { return false; } - Index tableIdx = getU32LEB(); - if (tableIdx >= wasm.tables.size()) { - throwError("bad table index"); - } auto* curr = allocator.alloc(); + Index tableIdx = getU32LEB(); + curr->table = getTableName(tableIdx); if (getTable(tableIdx)->is64()) { curr->type = Type::i64; } curr->finalize(); - // Defer setting the table name for later, when we know it. - tableRefs[tableIdx].push_back(&curr->table); out = curr; return true; } @@ -5488,19 +5553,15 @@ bool WasmBinaryReader::maybeVisitTableGrow(Expression*& out, uint32_t code) { if (code != BinaryConsts::TableGrow) { return false; } - Index tableIdx = getU32LEB(); - if (tableIdx >= wasm.tables.size()) { - throwError("bad table index"); - } auto* curr = allocator.alloc(); + Index tableIdx = getU32LEB(); + curr->table = getTableName(tableIdx); curr->delta = popNonVoidExpression(); curr->value = popNonVoidExpression(); if (getTable(tableIdx)->is64()) { curr->type = Type::i64; } curr->finalize(); - // Defer setting the table name for later, when we know it. - tableRefs[tableIdx].push_back(&curr->table); out = curr; return true; } @@ -5509,16 +5570,11 @@ bool WasmBinaryReader::maybeVisitTableFill(Expression*& out, uint32_t code) { if (code != BinaryConsts::TableFill) { return false; } - Index tableIdx = getU32LEB(); - if (tableIdx >= wasm.tables.size()) { - throwError("bad table index"); - } + auto table = getTableName(getU32LEB()); auto* size = popNonVoidExpression(); auto* value = popNonVoidExpression(); auto* dest = popNonVoidExpression(); - auto* ret = Builder(wasm).makeTableFill(Name(), dest, value, size); - tableRefs[tableIdx].push_back(&ret->table); - out = ret; + out = Builder(wasm).makeTableFill(table, dest, value, size); return true; } @@ -5526,21 +5582,12 @@ bool WasmBinaryReader::maybeVisitTableCopy(Expression*& out, uint32_t code) { if (code != BinaryConsts::TableCopy) { return false; } - Index destTableIdx = getU32LEB(); - if (destTableIdx >= wasm.tables.size()) { - throwError("bad table index"); - } - Index sourceTableIdx = getU32LEB(); - if (sourceTableIdx >= wasm.tables.size()) { - throwError("bad table index"); - } + auto destTable = getTableName(getU32LEB()); + auto srcTable = getTableName(getU32LEB()); auto* size = popNonVoidExpression(); auto* source = popNonVoidExpression(); auto* dest = popNonVoidExpression(); - auto* ret = Builder(wasm).makeTableCopy(dest, source, size, Name(), Name()); - tableRefs[destTableIdx].push_back(&ret->destTable); - tableRefs[sourceTableIdx].push_back(&ret->sourceTable); - out = ret; + out = Builder(wasm).makeTableCopy(dest, source, size, destTable, srcTable); return true; } @@ -5552,10 +5599,8 @@ bool WasmBinaryReader::maybeVisitTableInit(Expression*& out, uint32_t code) { curr->size = popNonVoidExpression(); curr->offset = popNonVoidExpression(); curr->dest = popNonVoidExpression(); - Index segIdx = getU32LEB(); - elemRefs[segIdx].push_back(&curr->segment); - Index memIdx = getU32LEB(); - tableRefs[memIdx].push_back(&curr->table); + curr->segment = getElemName(getU32LEB()); + curr->table = getTableName(getU32LEB()); curr->finalize(); out = curr; return true; @@ -6553,7 +6598,7 @@ bool WasmBinaryReader::maybeVisitSIMDStore(Expression*& out, uint32_t code) { curr->bytes = 16; curr->valueType = Type::v128; Index memIdx = readMemoryAccess(curr->align, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); curr->isAtomic = false; curr->value = popNonVoidExpression(); curr->ptr = popNonVoidExpression(); @@ -6811,7 +6856,7 @@ bool WasmBinaryReader::maybeVisitSIMDLoad(Expression*& out, uint32_t code) { curr->type = Type::v128; curr->bytes = 16; Index memIdx = readMemoryAccess(curr->align, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); curr->isAtomic = false; curr->ptr = popNonVoidExpression(); curr->finalize(); @@ -6872,7 +6917,7 @@ bool WasmBinaryReader::maybeVisitSIMDLoad(Expression*& out, uint32_t code) { return false; } Index memIdx = readMemoryAccess(curr->align, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); curr->ptr = popNonVoidExpression(); curr->finalize(); out = curr; @@ -6922,7 +6967,7 @@ bool WasmBinaryReader::maybeVisitSIMDLoadStoreLane(Expression*& out, auto* curr = allocator.alloc(); curr->op = op; Index memIdx = readMemoryAccess(curr->align, curr->offset); - memoryRefs[memIdx].push_back(&curr->memory); + curr->memory = getMemoryName(memIdx); curr->index = getLaneIndex(lanes); curr->vec = popNonVoidExpression(); curr->ptr = popNonVoidExpression(); @@ -6965,20 +7010,21 @@ void WasmBinaryReader::visitReturn(Return* curr) { void WasmBinaryReader::visitMemorySize(MemorySize* curr) { Index index = getU32LEB(); + curr->memory = getMemoryName(index); if (getMemory(index)->is64()) { curr->type = Type::i64; } curr->finalize(); - memoryRefs[index].push_back(&curr->memory); } void WasmBinaryReader::visitMemoryGrow(MemoryGrow* curr) { - curr->delta = popNonVoidExpression(); Index index = getU32LEB(); + curr->memory = getMemoryName(index); if (getMemory(index)->is64()) { curr->type = Type::i64; } - memoryRefs[index].push_back(&curr->memory); + curr->delta = popNonVoidExpression(); + curr->finalize(); } void WasmBinaryReader::visitNop(Nop* curr) {} @@ -7001,14 +7047,9 @@ void WasmBinaryReader::visitRefIsNull(RefIsNull* curr) { void WasmBinaryReader::visitRefFunc(RefFunc* curr) { Index index = getU32LEB(); - // We don't know function names yet, so record this use to be updated later. - // Note that we do not need to check that 'index' is in bounds, as that will - // be verified in the next line. (Also, note that functionRefs[index] may - // write to an odd place in the functionRefs map if index is invalid, but that - // is harmless.) - functionRefs[index].push_back(&curr->func); // To support typed function refs, we give the reference not just a general // funcref, but a specific subtype with the actual signature. + curr->func = getFunctionName(index); curr->finalize(Type(getTypeByFunctionIndex(index), NonNullable)); } @@ -7020,26 +7061,18 @@ void WasmBinaryReader::visitRefEq(RefEq* curr) { void WasmBinaryReader::visitTableGet(TableGet* curr) { Index tableIdx = getU32LEB(); - if (tableIdx >= wasm.tables.size()) { - throwError("bad table index"); - } + curr->table = getTableName(tableIdx); curr->index = popNonVoidExpression(); curr->type = wasm.tables[tableIdx]->type; curr->finalize(); - // Defer setting the table name for later, when we know it. - tableRefs[tableIdx].push_back(&curr->table); } void WasmBinaryReader::visitTableSet(TableSet* curr) { Index tableIdx = getU32LEB(); - if (tableIdx >= wasm.tables.size()) { - throwError("bad table index"); - } + curr->table = getTableName(tableIdx); curr->value = popNonVoidExpression(); curr->index = popNonVoidExpression(); curr->finalize(); - // Defer setting the table name for later, when we know it. - tableRefs[tableIdx].push_back(&curr->table); } void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) { @@ -7076,11 +7109,6 @@ void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) { } }; - // We cannot immediately update tagRefs in the loop below, as catchTags is - // being grown, an so references would get invalidated. Store the indexes - // here, then do that later. - std::vector tagIndexes; - while (lastSeparator == BinaryConsts::Catch_Legacy || lastSeparator == BinaryConsts::CatchAll_Legacy) { if (lastSeparator == BinaryConsts::Catch_Legacy) { @@ -7088,7 +7116,6 @@ void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) { if (index >= wasm.tags.size()) { throwError("bad tag index"); } - tagIndexes.push_back(index); auto* tag = wasm.tags[index].get(); curr->catchTags.push_back(tag->name); readCatchBody(tag->sig.params); @@ -7101,11 +7128,6 @@ void WasmBinaryReader::visitTryOrTryInBlock(Expression*& out) { } breakStack.pop_back(); - for (Index i = 0; i < tagIndexes.size(); i++) { - // We don't know the final name yet. - tagRefs[tagIndexes[i]].push_back(&curr->catchTags[i]); - } - if (lastSeparator == BinaryConsts::Delegate) { curr->delegateTarget = getExceptionTargetName(getU32LEB()); } @@ -7200,10 +7222,6 @@ void WasmBinaryReader::visitTryTable(TryTable* curr) { // within each try-body, and let branches target those inner blocks instead. curr->type = getType(); auto numCatches = getU32LEB(); - // We cannot immediately update tagRefs in the loop below, as catchTags is - // being grown, an so references would get invalidated. Store the indexes - // here, then do that later. - std::vector tagIndexes; for (size_t i = 0; i < numCatches; i++) { uint8_t code = getInt8(); @@ -7212,11 +7230,9 @@ void WasmBinaryReader::visitTryTable(TryTable* curr) { if (index >= wasm.tags.size()) { throwError("bad tag index"); } - tagIndexes.push_back(index); auto* tag = wasm.tags[index].get(); curr->catchTags.push_back(tag->name); } else { - tagIndexes.push_back(-1); // unused curr->catchTags.push_back(Name()); } curr->catchDests.push_back(getBreakTarget(getU32LEB()).name); @@ -7224,13 +7240,6 @@ void WasmBinaryReader::visitTryTable(TryTable* curr) { code == BinaryConsts::CatchAllRef); } - for (Index i = 0; i < tagIndexes.size(); i++) { - if (curr->catchTags[i]) { - // We don't know the final name yet. - tagRefs[tagIndexes[i]].push_back(&curr->catchTags[i]); - } - } - // catch_*** clauses should refer to block labels without entering the try // scope. So we do this after reading catch clauses. startControlFlow(curr); @@ -7245,7 +7254,6 @@ void WasmBinaryReader::visitThrow(Throw* curr) { } auto* tag = wasm.tags[index].get(); curr->tag = tag->name; - tagRefs[index].push_back(&curr->tag); // we don't know the final name yet size_t num = tag->sig.params.size(); curr->operands.resize(num); for (size_t i = 0; i < num; i++) { @@ -7493,18 +7501,13 @@ bool WasmBinaryReader::maybeVisitArrayNewElem(Expression*& out, uint32_t code) { throwError("Expected array heaptype"); } auto segIdx = getU32LEB(); + auto segment = isData ? getDataName(segIdx) : getElemName(segIdx); auto* size = popNonVoidExpression(); auto* offset = popNonVoidExpression(); if (isData) { - auto* curr = - Builder(wasm).makeArrayNewData(heapType, Name(), offset, size); - dataRefs[segIdx].push_back(&curr->segment); - out = curr; + out = Builder(wasm).makeArrayNewData(heapType, segment, offset, size); } else { - auto* curr = - Builder(wasm).makeArrayNewElem(heapType, Name(), offset, size); - elemRefs[segIdx].push_back(&curr->segment); - out = curr; + out = Builder(wasm).makeArrayNewElem(heapType, segment, offset, size); } return true; } @@ -7635,21 +7638,16 @@ bool WasmBinaryReader::maybeVisitArrayInit(Expression*& out, uint32_t code) { throwError("Expected array heaptype"); } Index segIdx = getU32LEB(); + auto segment = isData ? getDataName(segIdx) : getElemName(segIdx); auto* size = popNonVoidExpression(); auto* offset = popNonVoidExpression(); auto* index = popNonVoidExpression(); auto* ref = popNonVoidExpression(); validateHeapTypeUsingChild(ref, heapType); if (isData) { - auto* curr = - Builder(wasm).makeArrayInitData(Name(), ref, index, offset, size); - dataRefs[segIdx].push_back(&curr->segment); - out = curr; + out = Builder(wasm).makeArrayInitData(segment, ref, index, offset, size); } else { - auto* curr = - Builder(wasm).makeArrayInitElem(Name(), ref, index, offset, size); - elemRefs[segIdx].push_back(&curr->segment); - out = curr; + out = Builder(wasm).makeArrayInitElem(segment, ref, index, offset, size); } return true; } @@ -7858,25 +7856,12 @@ void WasmBinaryReader::visitResume(Resume* curr) { } auto numHandlers = getU32LEB(); - - // We *must* bring the handlerTags vector to an appropriate size to ensure - // that we do not invalidate the pointers we add to tagRefs. They need to stay - // valid until processNames ran. curr->handlerTags.resize(numHandlers); curr->handlerBlocks.resize(numHandlers); for (size_t i = 0; i < numHandlers; i++) { - auto tagIndex = getU32LEB(); - auto tag = getTagName(tagIndex); - - auto handlerIndex = getU32LEB(); - auto handler = getBreakTarget(handlerIndex).name; - - curr->handlerTags[i] = tag; - curr->handlerBlocks[i] = handler; - - // We don't know the final name yet - tagRefs[tagIndex].push_back(&curr->handlerTags[i]); + curr->handlerTags[i] = getTagName(getU32LEB()); + curr->handlerBlocks[i] = getBreakTarget(getU32LEB()).name; } curr->cont = popNonVoidExpression(); @@ -7892,14 +7877,9 @@ void WasmBinaryReader::visitResume(Resume* curr) { } void WasmBinaryReader::visitSuspend(Suspend* curr) { - auto tagIndex = getU32LEB(); - if (tagIndex >= wasm.tags.size()) { - throwError("bad tag index"); - } + curr->tag = getTagName(tagIndex); auto* tag = wasm.tags[tagIndex].get(); - curr->tag = tag->name; - tagRefs[tagIndex].push_back(&curr->tag); auto numArgs = tag->sig.params.size(); curr->operands.resize(numArgs); From 8f4ca9f5e87c4921ce46eb316b0f281ba80c64f6 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 19 Sep 2024 14:18:22 -0700 Subject: [PATCH 4/5] skip unparsed functions when printing --- src/ir/module-utils.cpp | 5 ++++- src/passes/Print.cpp | 3 +++ src/wasm/wasm-binary.cpp | 7 ++----- test/lit/binary/debug-bad-binary.test | 6 ------ 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/ir/module-utils.cpp b/src/ir/module-utils.cpp index 1efbe490a07..490955866b0 100644 --- a/src/ir/module-utils.cpp +++ b/src/ir/module-utils.cpp @@ -448,7 +448,10 @@ InsertOrderedMap collectHeapTypeInfo( for (auto type : func->vars) { info.note(type); } - if (!func->imported()) { + // Don't just use `func->imported()` here because we also might be + // printing an error message on a partially parsed module whose declared + // function bodies have not all been parsed yet. + if (func->body) { CodeScanner(wasm, info, inclusion).walk(func->body); } }); diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 46d519e9eda..d49bc297472 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2980,6 +2980,9 @@ void PrintSExpression::visitDefinedGlobal(Global* curr) { void PrintSExpression::visitFunction(Function* curr) { if (curr->imported()) { visitImportedFunction(curr); + } else if (curr->body == nullptr) { + // We are in the middle of parsing the module and have not parsed this + // function's code yet. Skip it. } else { visitDefinedFunction(curr); } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 96e9317094b..d999141fd4a 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2600,11 +2600,8 @@ void WasmBinaryReader::readFunctionSignatures() { functionTypes.push_back(type); // Check that the type is a signature. getSignatureByTypeIndex(index); - // Create the function with a placeholder body so we don't trip over it if - // we have to print it as part of an error message. - Builder builder(wasm); - wasm.addFunction(builder.makeFunction( - makeName("", i), type, {}, builder.makeUnreachable())); + wasm.addFunction( + Builder(wasm).makeFunction(makeName("", i), type, {}, nullptr)); } } diff --git a/test/lit/binary/debug-bad-binary.test b/test/lit/binary/debug-bad-binary.test index 634146a7653..63a2ed2ccac 100644 --- a/test/lit/binary/debug-bad-binary.test +++ b/test/lit/binary/debug-bad-binary.test @@ -38,10 +38,4 @@ RUN: not wasm-opt --debug %s.wasm 2>&1 | filecheck %s ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) -;; CHECK-NEXT: (func $1 -;; CHECK-NEXT: (unreachable) -;; CHECK-NEXT: ) -;; CHECK-NEXT: (func $2 -;; CHECK-NEXT: (unreachable) -;; CHECK-NEXT: ) ;; CHECK-NEXT: ) From 8d8eb1397e6c6c0b108f2895c7a6007ee8438b0d Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 19 Sep 2024 15:19:48 -0700 Subject: [PATCH 5/5] update and fix test of datacount error --- src/passes/Print.cpp | 5 +++++ test/lit/binary/bad-datacount.test | 9 +++++++++ .../binary/bad-datacount.test.wasm} | Bin test/unit/test_datacount.py | 15 --------------- 4 files changed, 14 insertions(+), 15 deletions(-) create mode 100644 test/lit/binary/bad-datacount.test rename test/{unit/input/bulkmem_bad_datacount.wasm => lit/binary/bad-datacount.test.wasm} (100%) delete mode 100644 test/unit/test_datacount.py diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 46d519e9eda..2fbdb62798f 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -3235,6 +3235,11 @@ void PrintSExpression::visitMemory(Memory* curr) { } void PrintSExpression::visitDataSegment(DataSegment* curr) { + if (!curr->isPassive && !curr->offset) { + // This data segment must have been created from the datacount section but + // not parsed yet. Skip it. + return; + } doIndent(o, indent); o << '('; printMajor(o, "data "); diff --git a/test/lit/binary/bad-datacount.test b/test/lit/binary/bad-datacount.test new file mode 100644 index 00000000000..27e0c72a398 --- /dev/null +++ b/test/lit/binary/bad-datacount.test @@ -0,0 +1,9 @@ +RUN: not wasm-opt --debug %s.wasm 2>&1 | filecheck %s + +;; Check that we get the expected error. + +;; CHECK: data count and data sections disagree on size + +;; Check that we print out the module rather than hitting an assertion error. + +;; CHECK: (module diff --git a/test/unit/input/bulkmem_bad_datacount.wasm b/test/lit/binary/bad-datacount.test.wasm similarity index 100% rename from test/unit/input/bulkmem_bad_datacount.wasm rename to test/lit/binary/bad-datacount.test.wasm diff --git a/test/unit/test_datacount.py b/test/unit/test_datacount.py deleted file mode 100644 index b996865a53c..00000000000 --- a/test/unit/test_datacount.py +++ /dev/null @@ -1,15 +0,0 @@ -from scripts.test import shared -from . import utils - - -class DataCountTest(utils.BinaryenTestCase): - def test_datacount(self): - self.roundtrip('bulkmem_data.wasm') - - def test_bad_datacount(self): - path = self.input_path('bulkmem_bad_datacount.wasm') - p = shared.run_process(shared.WASM_OPT + ['-g', '-o', '-', path], - check=False, capture_output=True) - self.assertNotEqual(p.returncode, 0) - self.assertIn('Number of segments does not agree with DataCount section', - p.stderr)