From d490318d64a0de809c19333c4b1c5ddfdfa65d18 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 Feb 2024 15:50:19 -0800 Subject: [PATCH] StringLowering pass (#6271) This extends StringGathering by replacing the gathered string globals to imported globals. It adds a custom section with the strings that the imports are expected to provide. It also replaces the string type with extern. This is a complete lowering of strings, except for string operations that are a TODO. After running this, no strings remain in the wasm, and the outside JS is expected to provide the proper imports, which it can do by processing the JSON of the strings in the custom section "string.consts", which looks like ["foo", "bar", ..] That is, an array of strings, which are imported as (import "string.const" "0" (global $string.const_foo (ref extern))) ;; foo (import "string.const" "1" (global $string.const_bar (ref extern))) ;; bar --- src/passes/StringLowering.cpp | 64 +++++++++++++++++++++++++-- src/passes/pass.cpp | 3 ++ src/passes/passes.h | 1 + test/lit/help/wasm-opt.test | 3 ++ test/lit/help/wasm2js.test | 3 ++ test/lit/passes/string-gathering.wast | 46 +++++++++++++++++++ test/lit/passes/string-lowering.wast | 23 ++++++++++ 7 files changed, 139 insertions(+), 4 deletions(-) create mode 100644 test/lit/passes/string-lowering.wast diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index f880a50b250..31e41b9e8c7 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -21,17 +21,18 @@ // globals, avoiding them appearing in code that can run more than once (which // can have overhead in VMs). // -// Building on that, an extended version of StringGathering will also replace -// those new globals with imported globals of type externref, for use with the -// string imports proposal. String operations will likewise need to be lowered. -// TODO +// StringLowering does the same, and also replaces those new globals with +// imported globals of type externref, for use with the string imports proposal. +// String operations will likewise need to be lowered. TODO // #include #include "ir/module-utils.h" #include "ir/names.h" +#include "ir/type-updating.h" #include "pass.h" +#include "support/json.h" #include "wasm-builder.h" #include "wasm.h" @@ -175,6 +176,61 @@ struct StringGathering : public Pass { } }; +struct StringLowering : public StringGathering { + void run(Module* module) override { + if (!module->features.has(FeatureSet::Strings)) { + return; + } + + // First, run the gathering operation so all string.consts are in one place. + StringGathering::run(module); + + // Lower the string.const globals into imports. + makeImports(module); + + // Remove all HeapType::string etc. in favor of externref. + updateTypes(module); + + // Disable the feature here after we lowered everything away. + module->features.disable(FeatureSet::Strings); + } + + void makeImports(Module* module) { + Index importIndex = 0; + json::Value stringArray; + stringArray.setArray(); + std::vector importedStrings; + for (auto& global : module->globals) { + if (global->init) { + if (auto* c = global->init->dynCast()) { + global->module = "string.const"; + global->base = std::to_string(importIndex); + importIndex++; + global->init = nullptr; + + auto str = json::Value::make(std::string(c->string.str).c_str()); + stringArray.push_back(str); + } + } + } + + // Add a custom section with the JSON. + std::stringstream stream; + stringArray.stringify(stream); + auto str = stream.str(); + auto vec = std::vector(str.begin(), str.end()); + module->customSections.emplace_back( + CustomSection{"string.consts", std::move(vec)}); + } + + void updateTypes(Module* module) { + TypeMapper::TypeUpdates updates; + updates[HeapType::string] = HeapType::ext; + TypeMapper(*module, updates).map(); + } +}; + Pass* createStringGatheringPass() { return new StringGathering(); } +Pass* createStringLoweringPass() { return new StringLowering(); } } // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 057642ea2b2..c00a5e70603 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -478,6 +478,9 @@ void PassRegistry::registerPasses() { registerPass("string-gathering", "gathers wasm strings to globals", createStringGatheringPass); + registerPass("string-lowering", + "lowers wasm strings and operations to imports", + createStringLoweringPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", diff --git a/src/passes/passes.h b/src/passes/passes.h index 29577210957..3f8b3fe6b1c 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -154,6 +154,7 @@ Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); Pass* createStringGatheringPass(); +Pass* createStringLoweringPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 8732f5e27ce..5a207a1b05d 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -469,6 +469,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --string-gathering gathers wasm strings to globals ;; CHECK-NEXT: +;; CHECK-NEXT: --string-lowering lowers wasm strings and +;; CHECK-NEXT: operations to imports +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 40d608857b2..43243b99ab8 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -428,6 +428,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --string-gathering gathers wasm strings to globals ;; CHECK-NEXT: +;; CHECK-NEXT: --string-lowering lowers wasm strings and +;; CHECK-NEXT: operations to imports +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast index 21fe358a3c6..657858fc050 100644 --- a/test/lit/passes/string-gathering.wast +++ b/test/lit/passes/string-gathering.wast @@ -1,9 +1,15 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; RUN: foreach %s %t wasm-opt --string-gathering -all -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s --check-prefix=LOWER ;; All the strings should be collected into globals and used from there. They ;; should also be sorted deterministically (alphabetically). +;; +;; LOWER also lowers away strings entirely, leaving only imports and a custom +;; section (that part is tested in string-lowering.wast). It also removes all +;; uses of the string heap type, leaving extern instead for the imported +;; strings. (module ;; Note that $global will be reused: no new global will be added for "foo". @@ -19,6 +25,15 @@ (global $global (ref string) (string.const "foo")) ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) + ;; LOWER: (type $0 (func)) + + ;; LOWER: (import "string.const" "0" (global $string.const_bar (ref extern))) + + ;; LOWER: (import "string.const" "1" (global $string.const_other (ref extern))) + + ;; LOWER: (import "string.const" "2" (global $global (ref extern))) + + ;; LOWER: (global $global2 externref (global.get $string.const_bar)) (global $global2 (ref null string) (string.const "bar")) ;; CHECK: (func $a (type $0) @@ -29,6 +44,14 @@ ;; CHECK-NEXT: (global.get $global) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; LOWER: (func $a (type $0) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $string.const_bar) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $global) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: ) (func $a (drop (string.const "bar") @@ -52,6 +75,20 @@ ;; CHECK-NEXT: (global.get $global2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; LOWER: (func $b (type $0) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $string.const_bar) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $string.const_other) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $global) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $global2) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: ) (func $b (drop (string.const "bar") @@ -74,23 +111,32 @@ ;; Multiple possible reusable globals. Also test ignoring of imports. (module ;; CHECK: (import "a" "b" (global $import (ref string))) + ;; LOWER: (import "a" "b" (global $import (ref extern))) (import "a" "b" (global $import (ref string))) ;; CHECK: (global $global1 (ref string) (string.const "foo")) (global $global1 (ref string) (string.const "foo")) ;; CHECK: (global $global2 (ref string) (global.get $global1)) + ;; LOWER: (import "string.const" "0" (global $global1 (ref extern))) + + ;; LOWER: (import "string.const" "1" (global $global4 (ref extern))) + + ;; LOWER: (global $global2 (ref extern) (global.get $global1)) (global $global2 (ref string) (string.const "foo")) ;; CHECK: (global $global3 (ref string) (global.get $global1)) + ;; LOWER: (global $global3 (ref extern) (global.get $global1)) (global $global3 (ref string) (string.const "foo")) ;; CHECK: (global $global4 (ref string) (string.const "bar")) (global $global4 (ref string) (string.const "bar")) ;; CHECK: (global $global5 (ref string) (global.get $global4)) + ;; LOWER: (global $global5 (ref extern) (global.get $global4)) (global $global5 (ref string) (string.const "bar")) ;; CHECK: (global $global6 (ref string) (global.get $global4)) + ;; LOWER: (global $global6 (ref extern) (global.get $global4)) (global $global6 (ref string) (string.const "bar")) ) diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast new file mode 100644 index 00000000000..628092d1c4e --- /dev/null +++ b/test/lit/passes/string-lowering.wast @@ -0,0 +1,23 @@ +;; This file checks the custom section that --string-lowering adds. The other +;; operations are tested in string-gathering.wast (which is auto-updated, unlike +;; this which is manual). + +;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s + +(module + (func $consts + (drop + (string.const "foo") + ) + (drop + (string.const "bar") + ) + (drop + (string.const "foo") + ) + ) +) + +;; The custom section should contain foo and bar, and foo only once. +;; CHECK: custom section "string.consts", size 13, contents: "[\"bar\",\"foo\"]" +