diff --git a/src/wasm-binary.h b/src/wasm-binary.h index a384d2cd5f9..0931a818005 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1490,8 +1490,11 @@ class WasmBinaryWriter { MixedArena allocator; - // storage of source map locations until the section is placed at its final - // location (shrinking LEBs may cause changes there) + // Storage of source map locations until the section is placed at its final + // location (shrinking LEBs may cause changes there). + // + // A null DebugLocation* indicates we have no debug information for that + // location. std::vector> sourceMapLocations; size_t sourceMapLocationsSizeAtSectionStart; @@ -1522,13 +1525,26 @@ class WasmBinaryReader { Module& wasm; MixedArena& allocator; const std::vector& input; + std::istream* sourceMap; + struct NextDebugLocation { uint32_t availablePos; uint32_t previousPos; Function::DebugLocation next; - }; - NextDebugLocation nextDebugLocation; + } nextDebugLocation; + + // Whether debug info is present next or not in the next debug location. A + // debug location can contain debug info (file:line:col) or it might not. We + // need to track this boolean alongside |nextDebugLocation| - that is, we + // can't just do something like std::optional or such - as we + // still need to track the values in |next|, as later positions are relative + // to them. That is, if we have line number 100, then no debug info, and then + // line number 500, then when we get to 500 we will see "+400" which is + // relative to the last existing line number (we "skip" over the place without + // debug info). + bool nextDebugLocationHasDebugInfo; + bool debugInfo = true; bool DWARF = false; bool skipFunctionBodies = false; diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 556f52157d8..d6eb3b23504 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1186,12 +1186,16 @@ void WasmBinaryWriter::writeSourceMapEpilog() { *sourceMap << ","; } writeBase64VLQ(*sourceMap, int32_t(offset - lastOffset)); - writeBase64VLQ(*sourceMap, int32_t(loc->fileIndex - lastLoc.fileIndex)); - writeBase64VLQ(*sourceMap, int32_t(loc->lineNumber - lastLoc.lineNumber)); - writeBase64VLQ(*sourceMap, - int32_t(loc->columnNumber - lastLoc.columnNumber)); - lastLoc = *loc; lastOffset = offset; + if (loc) { + // There is debug information for this location, so emit the next 3 + // fields and update lastLoc. + writeBase64VLQ(*sourceMap, int32_t(loc->fileIndex - lastLoc.fileIndex)); + writeBase64VLQ(*sourceMap, int32_t(loc->lineNumber - lastLoc.lineNumber)); + writeBase64VLQ(*sourceMap, + int32_t(loc->columnNumber - lastLoc.columnNumber)); + lastLoc = *loc; + } } *sourceMap << "\"}"; } @@ -1340,7 +1344,28 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { auto& debugLocations = func->debugLocations; auto iter = debugLocations.find(curr); if (iter != debugLocations.end()) { + // There is debug information here, write it out. writeDebugLocation(iter->second); + } else { + // This expression has no debug location. We need to emit an indication + // of that (so that we do not get "smeared" with debug info from anything + // before or after us). + // + // We don't need to write repeated "no debug info" indications, as a + // single one is enough to make it clear that the debug information before + // us is valid no longer. We also don't need to write one if there is + // nothing before us. + if (!sourceMapLocations.empty() && + sourceMapLocations.back().second != nullptr) { + sourceMapLocations.emplace_back(o.size(), nullptr); + + // Initialize the state of debug info to indicate there is no current + // debug info relevant. This sets |lastDebugLocation| to a dummy value, + // so that later places with debug info can see that they differ from + // it (without this, if we had some debug info, then a nullptr for none, + // and then the same debug info, we could get confused). + initializeDebugInfo(); + } } } // If this is an instruction in a function, and if the original wasm had @@ -1614,7 +1639,8 @@ WasmBinaryReader::WasmBinaryReader(Module& wasm, FeatureSet features, const std::vector& input) : wasm(wasm), allocator(wasm.allocator), input(input), - sourceMap(nullptr), nextDebugLocation{0, 0, {0, 0, 0}}, debugLocation() { + sourceMap(nullptr), nextDebugLocation{0, 0, {0, 0, 0}}, + nextDebugLocationHasDebugInfo(false), debugLocation() { wasm.features = features; } @@ -2776,6 +2802,10 @@ void WasmBinaryReader::readSourceMapHeader() { return; } // read first debug location + // TODO: Handle the case where the very first one has only a position but not + // debug info. In practice that does not happen, which needs + // investigation (if it does, it will assert in readBase64VLQ, so it + // would not be a silent error at least). uint32_t position = readBase64VLQ(*sourceMap); uint32_t fileIndex = readBase64VLQ(*sourceMap); uint32_t lineNumber = @@ -2783,6 +2813,7 @@ void WasmBinaryReader::readSourceMapHeader() { uint32_t columnNumber = readBase64VLQ(*sourceMap); nextDebugLocation = { position, position, {fileIndex, lineNumber, columnNumber}}; + nextDebugLocationHasDebugInfo = true; } void WasmBinaryReader::readNextDebugLocation() { @@ -2803,7 +2834,11 @@ void WasmBinaryReader::readNextDebugLocation() { debugLocation.clear(); // use debugLocation only for function expressions if (currFunction) { - debugLocation.insert(nextDebugLocation.next); + if (nextDebugLocationHasDebugInfo) { + debugLocation.insert(nextDebugLocation.next); + } else { + debugLocation.clear(); + } } char ch; @@ -2818,6 +2853,17 @@ void WasmBinaryReader::readNextDebugLocation() { int32_t positionDelta = readBase64VLQ(*sourceMap); uint32_t position = nextDebugLocation.availablePos + positionDelta; + + nextDebugLocation.previousPos = nextDebugLocation.availablePos; + nextDebugLocation.availablePos = position; + + auto peek = sourceMap->peek(); + if (peek == ',' || peek == '\"') { + // This is a 1-length entry, so the next location has no debug info. + nextDebugLocationHasDebugInfo = false; + break; + } + int32_t fileIndexDelta = readBase64VLQ(*sourceMap); uint32_t fileIndex = nextDebugLocation.next.fileIndex + fileIndexDelta; int32_t lineNumberDelta = readBase64VLQ(*sourceMap); @@ -2826,9 +2872,8 @@ void WasmBinaryReader::readNextDebugLocation() { uint32_t columnNumber = nextDebugLocation.next.columnNumber + columnNumberDelta; - nextDebugLocation = {position, - nextDebugLocation.availablePos, - {fileIndex, lineNumber, columnNumber}}; + nextDebugLocation.next = {fileIndex, lineNumber, columnNumber}; + nextDebugLocationHasDebugInfo = true; } } diff --git a/test/lit/debug/source-map-stop.wast b/test/lit/debug/source-map-stop.wast new file mode 100644 index 00000000000..90bf739c17d --- /dev/null +++ b/test/lit/debug/source-map-stop.wast @@ -0,0 +1,122 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. + +;; RUN: wasm-opt %s -g -o %s.wasm -osm %s.wasm.map +;; RUN: wasm-opt %s.wasm -ism %s.wasm.map -S -o - | filecheck %s + +;; Verify that writing to a source map and reading it back does not "smear" +;; debug info across adjacent instructions. The debug info in the output should +;; be identical to the input. + +(module + ;; CHECK: (func $test (param $0 i32) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $test + ;; CHECK-NEXT: ;;@ waka:100:1 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ;;@ waka:200:2 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + (func $test (param i32) (result i32) + ;; The drop&call have no debug info, and should remain so. Specifically the + ;; instruction right before them in the binary (the const 1) should not + ;; smear its debug info on it. And the drop is between an instruction that + ;; has debug info (the const 1) and another (the i32.const 2): we should not + ;; receive the debug info of either. (This is a regression test for a bug + ;; that only happens in that state: removing the debug info either before or + ;; after would avoid that bug.) + + (drop + (call $test + ;;@ waka:100:1 + (i32.const 1) + ) + ) + ;;@ waka:200:2 + (i32.const 2) + ) + + ;; CHECK: (func $same-later (param $0 i32) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $test + ;; CHECK-NEXT: ;;@ waka:100:1 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ;;@ waka:100:1 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + (func $same-later (param i32) (result i32) + ;; As the first, but now the later debug info is also 100:1. No debug info + ;; should change here. + + (drop + (call $test + ;;@ waka:100:1 + (i32.const 1) + ) + ) + ;;@ waka:100:1 + (i32.const 2) + ) + + ;; CHECK: (func $more-before (param $0 i32) (result i32) + ;; CHECK-NEXT: ;;@ waka:50:5 + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ;;@ waka:50:5 + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $test + ;; CHECK-NEXT: ;;@ waka:100:1 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ;;@ waka:200:2 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + (func $more-before (param i32) (result i32) + ;; As the first, but now there is more debug info before the 100:1 (the + ;; very first debug info in a function has special handling, so we test it + ;; more carefully). + ;; + ;; The s-parser actually smears 50:5 on the drop and call after it, so the + ;; output here looks incorrect. This may be a bug there, TODO + + ;;@ waka:50:5 + (nop) + (drop + (call $test + ;;@ waka:100:1 + (i32.const 1) + ) + ) + ;;@ waka:200:2 + (i32.const 2) + ) + + ;; CHECK: (func $nothing-before (param $0 i32) (result i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $test + ;; CHECK-NEXT: ;;@ waka:100:1 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ;;@ waka:200:2 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + (func $nothing-before (param i32) (result i32) + ;; As before, but no debug info on the nop before us (so the first + ;; instruction in the function no longer has a debug annotation). Nothing + ;; should change in the debug info. + (nop) + (drop + (call $test + ;;@ waka:100:1 + (i32.const 1) + ) + ) + ;;@ waka:200:2 + (i32.const 2) + ) +)