From 185019919b1fa2efe8d02056827a31f4f50dd01e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Jan 2024 16:03:33 -0800 Subject: [PATCH] Fix global effect computation with -O flags (#6211) We tested --generate-global-effects --vacuum and such, but not --generate-global-effects -O3 or the other -O flags. Unfortunately, our targeted testing missed a bug because of that. Specifically, we have special logic for -O flags to make sure the passes they expand into run with the proper opt and shrink levels, but that logic happened to also interfere with global effect computation. It would also interfere with allowing GUFA info or other things to be stored on the side, which we've proposed. This PR fixes that + future issues. The fix is to just allow a pass runner to execute more than once. We thought to avoid that and assert against it to keep the model "hermetic" (you create a pass runner, you run the passes, and you throw it out), which feels nice in a way, but it led to the bug here, and I'm not sure it would prevent any other ones really. It is also more code. It is simpler to allow a runner to execute more than once, and add a method to clear it. With that, the logic for -O3 execution is both simpler and does not interfere with anything but the opt and shrink level flags: we create a single runner, give it the proper options, and then keep using that runner + those options as we go, normally. --- src/pass.h | 11 +- src/passes/pass.cpp | 5 +- src/tools/optimization-options.h | 44 +++--- test/lit/passes/global-effects-O.wast | 198 ++++++++++++++++++++++++++ 4 files changed, 229 insertions(+), 29 deletions(-) create mode 100644 test/lit/passes/global-effects-O.wast diff --git a/src/pass.h b/src/pass.h index 83f53c23ad9..2c2fa06190c 100644 --- a/src/pass.h +++ b/src/pass.h @@ -235,8 +235,9 @@ struct PassOptions { // other passes later can benefit from it. It is up to the sequence of passes // to update or discard this when necessary - in particular, when new effects // are added to a function this must be changed or we may optimize - // incorrectly (however, it is extremely rare for a pass to *add* effects; - // passes normally only remove effects). + // incorrectly. However, it is extremely rare for a pass to *add* effects; + // passes normally only remove effects. Passes that do add effects must set + // addsEffects() so the pass runner is aware of them. std::shared_ptr funcEffectsMap; // -Os is our default @@ -318,6 +319,9 @@ struct PassRunner { // Add a pass given an instance. void add(std::unique_ptr pass) { doAdd(std::move(pass)); } + // Clears away all passes that have been added. + void clear(); + // Adds the pass if there are no DWARF-related issues. There is an issue if // there is DWARF and if the pass does not support DWARF (as defined by the // pass returning true from invalidatesDWARF); otherwise, if there is no @@ -387,9 +391,6 @@ struct PassRunner { // yet) have removed DWARF. bool addedPassesRemovedDWARF = false; - // Whether this pass runner has run. A pass runner should only be run once. - bool ran = false; - void runPass(Pass* pass); void runPassOnFunction(Pass* pass, Function* func); diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index d7391a390c3..62b3683fad9 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -739,9 +739,6 @@ static void dumpWasm(Name name, Module* wasm) { } void PassRunner::run() { - assert(!ran); - ran = true; - static const int passDebug = getPassDebug(); // Emit logging information when asked for. At passDebug level 1+ we log // the main passes, while in 2 we also log nested ones. Note that for @@ -885,6 +882,8 @@ void PassRunner::doAdd(std::unique_ptr pass) { passes.emplace_back(std::move(pass)); } +void PassRunner::clear() { passes.clear(); } + // Checks that the state is valid before and after a // pass runs on a function. We run these extra checks when // pass-debug mode is enabled. diff --git a/src/tools/optimization-options.h b/src/tools/optimization-options.h index 045542ac25a..0a47d9f709e 100644 --- a/src/tools/optimization-options.h +++ b/src/tools/optimization-options.h @@ -328,44 +328,46 @@ struct OptimizationOptions : public ToolOptions { bool runningPasses() { return passes.size() > 0; } void runPasses(Module& wasm) { - std::unique_ptr passRunner; + PassRunner passRunner(&wasm, passOptions); + if (debug) { + passRunner.setDebug(true); + } // Flush anything in the current pass runner, and then reset it to a fresh // state so it is ready for new things. - auto flushAndReset = [&]() { - if (passRunner) { - passRunner->run(); - } - passRunner = std::make_unique(&wasm, passOptions); - if (debug) { - passRunner->setDebug(true); - } + auto flush = [&]() { + passRunner.run(); + passRunner.clear(); }; - flushAndReset(); - for (auto& pass : passes) { if (pass.name == DEFAULT_OPT_PASSES) { // This is something like -O3 or -Oz. We must run this now, in order to // set the proper opt and shrink levels. To do that, first reset the // runner so that anything already queued is run (since we can only run // after those things). - flushAndReset(); + flush(); // -O3/-Oz etc. always set their own optimize/shrinkLevels. assert(pass.optimizeLevel); assert(pass.shrinkLevel); - passRunner->options.optimizeLevel = *pass.optimizeLevel; - passRunner->options.shrinkLevel = *pass.shrinkLevel; - // Run our optimizations now, and reset the runner so that the default - // pass options are used later (and not the temporary optimize/ - // shrinkLevels we just set). - passRunner->addDefaultOptimizationPasses(); - flushAndReset(); + // Temporarily override the default levels. + assert(passRunner.options.optimizeLevel == passOptions.optimizeLevel); + assert(passRunner.options.shrinkLevel == passOptions.shrinkLevel); + passRunner.options.optimizeLevel = *pass.optimizeLevel; + passRunner.options.shrinkLevel = *pass.shrinkLevel; + + // Run our optimizations now with the custom levels. + passRunner.addDefaultOptimizationPasses(); + flush(); + + // Restore the default optimize/shrinkLevels. + passRunner.options.optimizeLevel = passOptions.optimizeLevel; + passRunner.options.shrinkLevel = passOptions.shrinkLevel; } else { // This is a normal pass. Add it to the queue for execution. - passRunner->add(pass.name); + passRunner.add(pass.name); // Normal passes do not set their own optimize/shrinkLevels. assert(!pass.optimizeLevel); @@ -373,7 +375,7 @@ struct OptimizationOptions : public ToolOptions { } } - flushAndReset(); + flush(); } }; diff --git a/test/lit/passes/global-effects-O.wast b/test/lit/passes/global-effects-O.wast new file mode 100644 index 00000000000..c24159eace4 --- /dev/null +++ b/test/lit/passes/global-effects-O.wast @@ -0,0 +1,198 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects | filecheck %s --check-prefix CHECK_0 +;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -O1 | filecheck %s --check-prefix CHECK_1 +;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -O3 | filecheck %s --check-prefix CHECK_3 +;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -Os | filecheck %s --check-prefix CHECK_s +;; RUN: foreach %s %t wasm-opt -all -S -o - --generate-global-effects -O | filecheck %s --check-prefix CHECK_O + +;; Test that global effects benefit -O1 and related modes. + +(module + ;; CHECK_0: (type $0 (func)) + + ;; CHECK_0: (type $1 (func (result i32))) + + ;; CHECK_0: (export "main" (func $main)) + ;; CHECK_1: (type $0 (func)) + + ;; CHECK_1: (type $1 (func (result i32))) + + ;; CHECK_1: (export "main" (func $main)) + ;; CHECK_3: (type $0 (func)) + + ;; CHECK_3: (type $1 (func (result i32))) + + ;; CHECK_3: (export "main" (func $main)) + ;; CHECK_s: (type $0 (func)) + + ;; CHECK_s: (type $1 (func (result i32))) + + ;; CHECK_s: (export "main" (func $main)) + ;; CHECK_O: (type $0 (func)) + + ;; CHECK_O: (type $1 (func (result i32))) + + ;; CHECK_O: (export "main" (func $main)) + (export "main" (func $main)) + + ;; CHECK_0: (export "pointless-work" (func $pointless-work)) + ;; CHECK_1: (export "pointless-work" (func $pointless-work)) + ;; CHECK_3: (export "pointless-work" (func $pointless-work)) + ;; CHECK_s: (export "pointless-work" (func $pointless-work)) + ;; CHECK_O: (export "pointless-work" (func $pointless-work)) + (export "pointless-work" (func $pointless-work)) + + ;; CHECK_0: (func $main (type $0) + ;; CHECK_0-NEXT: (if + ;; CHECK_0-NEXT: (call $pointless-work) + ;; CHECK_0-NEXT: (then + ;; CHECK_0-NEXT: (drop + ;; CHECK_0-NEXT: (call $pointless-work) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: ) + ;; CHECK_1: (func $main (type $0) + ;; CHECK_1-NEXT: (nop) + ;; CHECK_1-NEXT: ) + ;; CHECK_3: (func $main (type $0) (; has Stack IR ;) + ;; CHECK_3-NEXT: (nop) + ;; CHECK_3-NEXT: ) + ;; CHECK_s: (func $main (type $0) (; has Stack IR ;) + ;; CHECK_s-NEXT: (nop) + ;; CHECK_s-NEXT: ) + ;; CHECK_O: (func $main (type $0) (; has Stack IR ;) + ;; CHECK_O-NEXT: (nop) + ;; CHECK_O-NEXT: ) + (func $main + ;; This calls a function that does pointless work. After generating global + ;; effects we can see that it is pointless and remove this entire if (except + ;; for -O0). + (if + (call $pointless-work) + (then + (drop + (call $pointless-work) + ) + ) + ) + ) + + ;; CHECK_0: (func $pointless-work (type $1) (result i32) + ;; CHECK_0-NEXT: (local $x i32) + ;; CHECK_0-NEXT: (loop $loop + ;; CHECK_0-NEXT: (local.set $x + ;; CHECK_0-NEXT: (i32.add + ;; CHECK_0-NEXT: (local.get $x) + ;; CHECK_0-NEXT: (i32.const 1) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: (if + ;; CHECK_0-NEXT: (i32.ge_u + ;; CHECK_0-NEXT: (local.get $x) + ;; CHECK_0-NEXT: (i32.const 12345678) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: (then + ;; CHECK_0-NEXT: (return + ;; CHECK_0-NEXT: (local.get $x) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: (br $loop) + ;; CHECK_0-NEXT: ) + ;; CHECK_0-NEXT: ) + ;; CHECK_1: (func $pointless-work (type $1) (result i32) + ;; CHECK_1-NEXT: (local $0 i32) + ;; CHECK_1-NEXT: (loop $loop (result i32) + ;; CHECK_1-NEXT: (br_if $loop + ;; CHECK_1-NEXT: (i32.lt_u + ;; CHECK_1-NEXT: (local.tee $0 + ;; CHECK_1-NEXT: (i32.add + ;; CHECK_1-NEXT: (local.get $0) + ;; CHECK_1-NEXT: (i32.const 1) + ;; CHECK_1-NEXT: ) + ;; CHECK_1-NEXT: ) + ;; CHECK_1-NEXT: (i32.const 12345678) + ;; CHECK_1-NEXT: ) + ;; CHECK_1-NEXT: ) + ;; CHECK_1-NEXT: (local.get $0) + ;; CHECK_1-NEXT: ) + ;; CHECK_1-NEXT: ) + ;; CHECK_3: (func $pointless-work (type $1) (; has Stack IR ;) (result i32) + ;; CHECK_3-NEXT: (local $0 i32) + ;; CHECK_3-NEXT: (loop $loop (result i32) + ;; CHECK_3-NEXT: (br_if $loop + ;; CHECK_3-NEXT: (i32.lt_u + ;; CHECK_3-NEXT: (local.tee $0 + ;; CHECK_3-NEXT: (i32.add + ;; CHECK_3-NEXT: (local.get $0) + ;; CHECK_3-NEXT: (i32.const 1) + ;; CHECK_3-NEXT: ) + ;; CHECK_3-NEXT: ) + ;; CHECK_3-NEXT: (i32.const 12345678) + ;; CHECK_3-NEXT: ) + ;; CHECK_3-NEXT: ) + ;; CHECK_3-NEXT: (local.get $0) + ;; CHECK_3-NEXT: ) + ;; CHECK_3-NEXT: ) + ;; CHECK_s: (func $pointless-work (type $1) (; has Stack IR ;) (result i32) + ;; CHECK_s-NEXT: (local $0 i32) + ;; CHECK_s-NEXT: (loop $loop (result i32) + ;; CHECK_s-NEXT: (br_if $loop + ;; CHECK_s-NEXT: (i32.lt_u + ;; CHECK_s-NEXT: (local.tee $0 + ;; CHECK_s-NEXT: (i32.add + ;; CHECK_s-NEXT: (local.get $0) + ;; CHECK_s-NEXT: (i32.const 1) + ;; CHECK_s-NEXT: ) + ;; CHECK_s-NEXT: ) + ;; CHECK_s-NEXT: (i32.const 12345678) + ;; CHECK_s-NEXT: ) + ;; CHECK_s-NEXT: ) + ;; CHECK_s-NEXT: (local.get $0) + ;; CHECK_s-NEXT: ) + ;; CHECK_s-NEXT: ) + ;; CHECK_O: (func $pointless-work (type $1) (; has Stack IR ;) (result i32) + ;; CHECK_O-NEXT: (local $0 i32) + ;; CHECK_O-NEXT: (loop $loop (result i32) + ;; CHECK_O-NEXT: (br_if $loop + ;; CHECK_O-NEXT: (i32.lt_u + ;; CHECK_O-NEXT: (local.tee $0 + ;; CHECK_O-NEXT: (i32.add + ;; CHECK_O-NEXT: (local.get $0) + ;; CHECK_O-NEXT: (i32.const 1) + ;; CHECK_O-NEXT: ) + ;; CHECK_O-NEXT: ) + ;; CHECK_O-NEXT: (i32.const 12345678) + ;; CHECK_O-NEXT: ) + ;; CHECK_O-NEXT: ) + ;; CHECK_O-NEXT: (local.get $0) + ;; CHECK_O-NEXT: ) + ;; CHECK_O-NEXT: ) + (func $pointless-work (result i32) + (local $x i32) + ;; Some pointless work, with no side effects, that cannot be inlined. (The + ;; changes here are not important for this test.) + (loop $loop + (local.set $x + (i32.add + (local.get $x) + (i32.const 1) + ) + ) + (if + (i32.ge_u + (local.get $x) + (i32.const 12345678) + ) + (then + (return + (local.get $x) + ) + ) + ) + (br $loop) + ) + ) +)