From bcbb9f6c04a1210e4eeb290efedf94c432968c2d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Nov 2023 15:48:15 -0800 Subject: [PATCH 1/3] work --- src/pass.h | 10 ++-- src/passes/Inlining.cpp | 52 +++++++++++++++---- .../inlining-optimizing_optimize-level=3.wast | 19 +++---- 3 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/pass.h b/src/pass.h index 74b501eab45..83f53c23ad9 100644 --- a/src/pass.h +++ b/src/pass.h @@ -67,13 +67,15 @@ struct InliningOptions { // Typically a size so small that after optimizations, the inlined code will // be smaller than the call instruction itself. 2 is a safe number because // there is no risk of things like + // // (func $reverse (param $x i32) (param $y i32) // (call $something (local.get $y) (local.get $x)) // ) - // in which case the reversing of the params means we'll possibly need - // a block and a temp local. But that takes at least 3 nodes, and 2 < 3. - // More generally, with 2 items we may have a local.get, but no way to - // require it to be saved instead of directly consumed. + // + // in which case the reversing of the params means we'll possibly need a temp + // local. But that takes at least 3 nodes, and 2 < 3, while with 2 items we + // may have a local.get, but no way to require it to be saved instead of + // directly consumed. Index alwaysInlineMaxSize = 2; // Function size which we inline when there is only one caller. By default we // inline all such functions (as after inlining we can remove the original diff --git a/src/passes/Inlining.cpp b/src/passes/Inlining.cpp index 26ebcf56649..20172d88d60 100644 --- a/src/passes/Inlining.cpp +++ b/src/passes/Inlining.cpp @@ -73,7 +73,19 @@ struct FunctionInfo { bool hasCalls; bool hasLoops; bool hasTryDelegate; - bool usedGlobally; // in a table or export + // Something is used globally if there is a reference to it in a table or + // export etc. + bool usedGlobally; + // We consider a function to be a trivial call if the body is just a call with + // trivial arguments, like this: + // + // (func $forward (param $x) (param $y) + // (call $target (local.get $x) (local.get $y)) + // ) + // + // Specifically the body must be a call, and the operands to the call must be + // of size 1 (generally, LocalGet or Const). + bool isTrivialCall; InliningMode inliningMode; FunctionInfo() { clear(); } @@ -85,6 +97,7 @@ struct FunctionInfo { hasLoops = false; hasTryDelegate = false; usedGlobally = false; + isTrivialCall = false; inliningMode = InliningMode::Unknown; } @@ -96,6 +109,7 @@ struct FunctionInfo { hasLoops = other.hasLoops; hasTryDelegate = other.hasTryDelegate; usedGlobally = other.usedGlobally; + isTrivialCall = other.isTrivialCall; inliningMode = other.inliningMode; return *this; } @@ -122,16 +136,28 @@ struct FunctionInfo { if (size > options.inlining.flexibleInlineMaxSize) { return false; } - // More than one use, so we can't eliminate it after inlining, - // so only worth it if we really care about speed and don't care - // about size. First, check if it has calls. In that case it is not - // likely to speed us up, and also if we want to inline such - // functions we would need to be careful to avoid infinite recursion. - if (hasCalls) { + // More than one use, so we can't eliminate it after inlining, and inlining + // it will hurt code size. Stop if we are focused on size or not heavily + // focused on speed. + if (options.shrinkLevel > 0 || options.optimizeLevel < 3) { return false; } - return options.optimizeLevel >= 3 && options.shrinkLevel == 0 && - (!hasLoops || options.inlining.allowFunctionsWithLoops); + if (hasCalls) { + // This has calls. If it is just a trivial call itself then inline, as we + // will save a call that way - basically we skip a trampoline in the + // middle - but if it is something more complex, leave it alone, as we may + // not help much (and with recursion we may end up with a wasteful + // increase in code size). + // + // Note that inlining trivial calls may increase code size, e.g. if they + // use a parameter more than once (forcing us after inlining to save that + // value to a local, etc.), but here we are optimizing for speed and not + // size, so we risk it. + return isTrivialCall; + } + // This doesn't have calls. Inline if loops do not prevent us (normally, a + // loop suggests a lot of work and so inlining is less useful). + return !hasLoops || options.inlining.allowFunctionsWithLoops; } }; @@ -198,6 +224,14 @@ struct FunctionInfoScanner } info.size = Measurer::measure(curr->body); + + if (auto* call = curr->body->dynCast()) { + if (info.size == call->operands.size() + 1) { + // This function body is a call with some trivial (size 1) operands like + // LocalGet or Const, so it is a trivial call. + info.isTrivialCall = true; + } + } } private: diff --git a/test/lit/passes/inlining-optimizing_optimize-level=3.wast b/test/lit/passes/inlining-optimizing_optimize-level=3.wast index fd646d5b484..a44429688b6 100644 --- a/test/lit/passes/inlining-optimizing_optimize-level=3.wast +++ b/test/lit/passes/inlining-optimizing_optimize-level=3.wast @@ -16,10 +16,10 @@ ;; CHECK: (type $FUNCSIG$i (func (result i32))) - ;; CHECK: (type $5 (func (param i32 i32 i32 i32) (result i32))) - ;; CHECK: (type $FUNCSIG$vii (func (param i32 i32))) + ;; CHECK: (type $6 (func (param i32 i32 i32 i32) (result i32))) + ;; CHECK: (type $FUNCSIG$v (func)) (type $FUNCSIG$v (func)) (type $FUNCSIG$i (func (result i32))) @@ -6487,11 +6487,12 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $12 - ;; CHECK-NEXT: (call $___udivdi3 + ;; CHECK-NEXT: (call $___udivmoddi4 ;; CHECK-NEXT: (local.get $12) ;; CHECK-NEXT: (local.get $20) ;; CHECK-NEXT: (i32.const 1000000000) ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (br_if $while-in66 @@ -14813,11 +14814,12 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $0 - ;; CHECK-NEXT: (call $___udivdi3 + ;; CHECK-NEXT: (call $___udivmoddi4 ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (if @@ -30696,15 +30698,6 @@ ) (local.get $3) ) - ;; CHECK: (func $___udivdi3 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) - ;; CHECK-NEXT: (call $___udivmoddi4 - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: (local.get $2) - ;; CHECK-NEXT: (local.get $3) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) (func $___udivdi3 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) (call $___udivmoddi4 (local.get $0) From 8ae9ad41f0999227147ca4e3c0b7c13f83dc4094 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Nov 2023 15:55:39 -0800 Subject: [PATCH 2/3] test --- .../lit/passes/inlining_optimize-level=3.wast | 160 ++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/test/lit/passes/inlining_optimize-level=3.wast b/test/lit/passes/inlining_optimize-level=3.wast index b993137c13d..1307fccd829 100644 --- a/test/lit/passes/inlining_optimize-level=3.wast +++ b/test/lit/passes/inlining_optimize-level=3.wast @@ -563,3 +563,163 @@ (unreachable) ) ) + +;; Inlining of trivial calls in the middle. +(module + (table 10 funcref) + + ;; Refer to the middle functions so that we do not inline them as single-use + ;; functions (which would be a trivial case, not related to trivial calls). + (elem (i32.const 0) $middle1 $middle2 $middle3) + + ;; CHECK: (type $0 (func (param i32 i32 i32))) + + ;; CHECK: (type $1 (func)) + + ;; CHECK: (table $0 10 funcref) + + ;; CHECK: (elem $0 (i32.const 0) $middle1 $middle2 $middle3) + + ;; CHECK: (func $top (param $x i32) (param $y i32) (param $z i32) + ;; CHECK-NEXT: (loop $loop + ;; CHECK-NEXT: (call $top + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (br $loop) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $top (param $x i32) (param $y i32) (param $z i32) + ;; This top function will not be inlined. + (loop $loop + (call $top + (i32.const 0) + (i32.const 0) + (i32.const 0) + ) + (br $loop) + ) + ) + + ;; CHECK: (func $middle1 (param $x i32) (param $y i32) (param $z i32) + ;; CHECK-NEXT: (call $top + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (local.get $z) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $middle1 (param $x i32) (param $y i32) (param $z i32) + ;; This function is a trivial call, which we can inline to the bottom. + (call $top + (local.get $x) + (local.get $y) + (local.get $z) + ) + ) + + ;; CHECK: (func $middle2 (param $x i32) (param $y i32) (param $z i32) + ;; CHECK-NEXT: (call $top + ;; CHECK-NEXT: (local.get $z) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $middle2 (param $x i32) (param $y i32) (param $z i32) + ;; Also trivial, even though the order of params is different and we have a + ;; const. + (call $top + (local.get $z) + (i32.const 42) + (local.get $x) + ) + ) + + ;; CHECK: (func $middle3 (param $x i32) (param $y i32) (param $z i32) + ;; CHECK-NEXT: (call $top + ;; CHECK-NEXT: (local.get $z) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $middle3 (param $x i32) (param $y i32) (param $z i32) + ;; Not trivial, becaues of the eqz. + (call $top + (local.get $z) + (i32.eqz + (i32.const 42) + ) + (local.get $x) + ) + ) + + ;; CHECK: (func $bottom + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (block $__inlined_func$middle1 + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $top + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (block $__inlined_func$middle2$1 + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $top + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $middle3 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $bottom + ;; The first two will be inlined. + (call $middle1 + (i32.const 1) + (i32.const 2) + (i32.const 3) + ) + (call $middle2 + (i32.const 1) + (i32.const 2) + (i32.const 3) + ) + (call $middle3 + (i32.const 1) + (i32.const 2) + (i32.const 3) + ) + ) +) From bc9e1ea8e244fb09c6f5b7ec342ecc73d7db38f8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Nov 2023 15:56:18 -0800 Subject: [PATCH 3/3] fix --- test/lit/passes/inlining_optimize-level=3.wast | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/test/lit/passes/inlining_optimize-level=3.wast b/test/lit/passes/inlining_optimize-level=3.wast index 1307fccd829..c03f7f11707 100644 --- a/test/lit/passes/inlining_optimize-level=3.wast +++ b/test/lit/passes/inlining_optimize-level=3.wast @@ -582,24 +582,19 @@ ;; CHECK: (func $top (param $x i32) (param $y i32) (param $z i32) ;; CHECK-NEXT: (loop $loop - ;; CHECK-NEXT: (call $top - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (br $loop) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $top (param $x i32) (param $y i32) (param $z i32) ;; This top function will not be inlined. (loop $loop - (call $top - (i32.const 0) - (i32.const 0) - (i32.const 0) - ) (br $loop) ) + ;; Add to the size so it isn't inlined as a tiny function. + (nop) + (nop) ) ;; CHECK: (func $middle1 (param $x i32) (param $y i32) (param $z i32)