Skip to content

Commit

Permalink
Inlining: Inline trivial calls (WebAssembly#6143)
Browse files Browse the repository at this point in the history
A trivial call is something like a function that just calls another immediately,

function foo(x, y) {
  return bar(y, 15);
}

We can inline those and expect to benefit in most cases, though we might
increase code size slightly. Hence it makes sense to inline such cases, even
though in general we are careful and do not inline functions with calls in
them; a "trampoline" like that likely has most of the work in the call itself,
which we can avoid by inlining.

Suggested based on findings in Java.
  • Loading branch information
kripken authored and radekdoulik committed Jul 12, 2024
1 parent 0e418fc commit 94b9ec2
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 26 deletions.
10 changes: 6 additions & 4 deletions src/pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,15 @@ struct InliningOptions {
// Typically a size so small that after optimizations, the inlined code will
// be smaller than the call instruction itself. 2 is a safe number because
// there is no risk of things like
//
// (func $reverse (param $x i32) (param $y i32)
// (call $something (local.get $y) (local.get $x))
// )
// in which case the reversing of the params means we'll possibly need
// a block and a temp local. But that takes at least 3 nodes, and 2 < 3.
// More generally, with 2 items we may have a local.get, but no way to
// require it to be saved instead of directly consumed.
//
// in which case the reversing of the params means we'll possibly need a temp
// local. But that takes at least 3 nodes, and 2 < 3, while with 2 items we
// may have a local.get, but no way to require it to be saved instead of
// directly consumed.
Index alwaysInlineMaxSize = 2;
// Function size which we inline when there is only one caller. By default we
// inline all such functions (as after inlining we can remove the original
Expand Down
52 changes: 43 additions & 9 deletions src/passes/Inlining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,19 @@ struct FunctionInfo {
bool hasCalls;
bool hasLoops;
bool hasTryDelegate;
bool usedGlobally; // in a table or export
// Something is used globally if there is a reference to it in a table or
// export etc.
bool usedGlobally;
// We consider a function to be a trivial call if the body is just a call with
// trivial arguments, like this:
//
// (func $forward (param $x) (param $y)
// (call $target (local.get $x) (local.get $y))
// )
//
// Specifically the body must be a call, and the operands to the call must be
// of size 1 (generally, LocalGet or Const).
bool isTrivialCall;
InliningMode inliningMode;

FunctionInfo() { clear(); }
Expand All @@ -85,6 +97,7 @@ struct FunctionInfo {
hasLoops = false;
hasTryDelegate = false;
usedGlobally = false;
isTrivialCall = false;
inliningMode = InliningMode::Unknown;
}

Expand All @@ -96,6 +109,7 @@ struct FunctionInfo {
hasLoops = other.hasLoops;
hasTryDelegate = other.hasTryDelegate;
usedGlobally = other.usedGlobally;
isTrivialCall = other.isTrivialCall;
inliningMode = other.inliningMode;
return *this;
}
Expand All @@ -122,16 +136,28 @@ struct FunctionInfo {
if (size > options.inlining.flexibleInlineMaxSize) {
return false;
}
// More than one use, so we can't eliminate it after inlining,
// so only worth it if we really care about speed and don't care
// about size. First, check if it has calls. In that case it is not
// likely to speed us up, and also if we want to inline such
// functions we would need to be careful to avoid infinite recursion.
if (hasCalls) {
// More than one use, so we can't eliminate it after inlining, and inlining
// it will hurt code size. Stop if we are focused on size or not heavily
// focused on speed.
if (options.shrinkLevel > 0 || options.optimizeLevel < 3) {
return false;
}
return options.optimizeLevel >= 3 && options.shrinkLevel == 0 &&
(!hasLoops || options.inlining.allowFunctionsWithLoops);
if (hasCalls) {
// This has calls. If it is just a trivial call itself then inline, as we
// will save a call that way - basically we skip a trampoline in the
// middle - but if it is something more complex, leave it alone, as we may
// not help much (and with recursion we may end up with a wasteful
// increase in code size).
//
// Note that inlining trivial calls may increase code size, e.g. if they
// use a parameter more than once (forcing us after inlining to save that
// value to a local, etc.), but here we are optimizing for speed and not
// size, so we risk it.
return isTrivialCall;
}
// This doesn't have calls. Inline if loops do not prevent us (normally, a
// loop suggests a lot of work and so inlining is less useful).
return !hasLoops || options.inlining.allowFunctionsWithLoops;
}
};

Expand Down Expand Up @@ -198,6 +224,14 @@ struct FunctionInfoScanner
}

info.size = Measurer::measure(curr->body);

if (auto* call = curr->body->dynCast<Call>()) {
if (info.size == call->operands.size() + 1) {
// This function body is a call with some trivial (size 1) operands like
// LocalGet or Const, so it is a trivial call.
info.isTrivialCall = true;
}
}
}

private:
Expand Down
19 changes: 6 additions & 13 deletions test/lit/passes/inlining-optimizing_optimize-level=3.wast
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

;; CHECK: (type $FUNCSIG$i (func (result i32)))

;; CHECK: (type $5 (func (param i32 i32 i32 i32) (result i32)))

;; CHECK: (type $FUNCSIG$vii (func (param i32 i32)))

;; CHECK: (type $6 (func (param i32 i32 i32 i32) (result i32)))

;; CHECK: (type $FUNCSIG$v (func))
(type $FUNCSIG$v (func))
(type $FUNCSIG$i (func (result i32)))
Expand Down Expand Up @@ -6487,11 +6487,12 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $12
;; CHECK-NEXT: (call $___udivdi3
;; CHECK-NEXT: (call $___udivmoddi4
;; CHECK-NEXT: (local.get $12)
;; CHECK-NEXT: (local.get $20)
;; CHECK-NEXT: (i32.const 1000000000)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (br_if $while-in66
Expand Down Expand Up @@ -14813,11 +14814,12 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $0
;; CHECK-NEXT: (call $___udivdi3
;; CHECK-NEXT: (call $___udivmoddi4
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (i32.const 10)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (if
Expand Down Expand Up @@ -30696,15 +30698,6 @@
)
(local.get $3)
)
;; CHECK: (func $___udivdi3 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32)
;; CHECK-NEXT: (call $___udivmoddi4
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (local.get $3)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $___udivdi3 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32)
(call $___udivmoddi4
(local.get $0)
Expand Down
155 changes: 155 additions & 0 deletions test/lit/passes/inlining_optimize-level=3.wast
Original file line number Diff line number Diff line change
Expand Up @@ -563,3 +563,158 @@
(unreachable)
)
)

;; Inlining of trivial calls in the middle.
(module
(table 10 funcref)

;; Refer to the middle functions so that we do not inline them as single-use
;; functions (which would be a trivial case, not related to trivial calls).
(elem (i32.const 0) $middle1 $middle2 $middle3)

;; CHECK: (type $0 (func (param i32 i32 i32)))

;; CHECK: (type $1 (func))

;; CHECK: (table $0 10 funcref)

;; CHECK: (elem $0 (i32.const 0) $middle1 $middle2 $middle3)

;; CHECK: (func $top (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (loop $loop
;; CHECK-NEXT: (br $loop)
;; CHECK-NEXT: )
;; CHECK-NEXT: (nop)
;; CHECK-NEXT: (nop)
;; CHECK-NEXT: )
(func $top (param $x i32) (param $y i32) (param $z i32)
;; This top function will not be inlined.
(loop $loop
(br $loop)
)
;; Add to the size so it isn't inlined as a tiny function.
(nop)
(nop)
)

;; CHECK: (func $middle1 (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: (local.get $y)
;; CHECK-NEXT: (local.get $z)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $middle1 (param $x i32) (param $y i32) (param $z i32)
;; This function is a trivial call, which we can inline to the bottom.
(call $top
(local.get $x)
(local.get $y)
(local.get $z)
)
)

;; CHECK: (func $middle2 (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $z)
;; CHECK-NEXT: (i32.const 42)
;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $middle2 (param $x i32) (param $y i32) (param $z i32)
;; Also trivial, even though the order of params is different and we have a
;; const.
(call $top
(local.get $z)
(i32.const 42)
(local.get $x)
)
)

;; CHECK: (func $middle3 (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $z)
;; CHECK-NEXT: (i32.eqz
;; CHECK-NEXT: (i32.const 42)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $middle3 (param $x i32) (param $y i32) (param $z i32)
;; Not trivial, becaues of the eqz.
(call $top
(local.get $z)
(i32.eqz
(i32.const 42)
)
(local.get $x)
)
)

;; CHECK: (func $bottom
;; CHECK-NEXT: (local $0 i32)
;; CHECK-NEXT: (local $1 i32)
;; CHECK-NEXT: (local $2 i32)
;; CHECK-NEXT: (local $3 i32)
;; CHECK-NEXT: (local $4 i32)
;; CHECK-NEXT: (local $5 i32)
;; CHECK-NEXT: (block
;; CHECK-NEXT: (block $__inlined_func$middle1
;; CHECK-NEXT: (local.set $0
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $1
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $2
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (block
;; CHECK-NEXT: (block $__inlined_func$middle2$1
;; CHECK-NEXT: (local.set $3
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $4
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $5
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: (i32.const 42)
;; CHECK-NEXT: (local.get $3)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (call $middle3
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $bottom
;; The first two will be inlined.
(call $middle1
(i32.const 1)
(i32.const 2)
(i32.const 3)
)
(call $middle2
(i32.const 1)
(i32.const 2)
(i32.const 3)
)
(call $middle3
(i32.const 1)
(i32.const 2)
(i32.const 3)
)
)
)

0 comments on commit 94b9ec2

Please sign in to comment.