Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inlining: Inline trivial calls #6143

Merged
merged 3 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,15 @@ struct InliningOptions {
// Typically a size so small that after optimizations, the inlined code will
// be smaller than the call instruction itself. 2 is a safe number because
// there is no risk of things like
//
// (func $reverse (param $x i32) (param $y i32)
// (call $something (local.get $y) (local.get $x))
// )
// in which case the reversing of the params means we'll possibly need
// a block and a temp local. But that takes at least 3 nodes, and 2 < 3.
// More generally, with 2 items we may have a local.get, but no way to
// require it to be saved instead of directly consumed.
//
// in which case the reversing of the params means we'll possibly need a temp
// local. But that takes at least 3 nodes, and 2 < 3, while with 2 items we
// may have a local.get, but no way to require it to be saved instead of
// directly consumed.
Index alwaysInlineMaxSize = 2;
// Function size which we inline when there is only one caller. By default we
// inline all such functions (as after inlining we can remove the original
Expand Down
52 changes: 43 additions & 9 deletions src/passes/Inlining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,19 @@ struct FunctionInfo {
bool hasCalls;
bool hasLoops;
bool hasTryDelegate;
bool usedGlobally; // in a table or export
// Something is used globally if there is a reference to it in a table or
// export etc.
bool usedGlobally;
// We consider a function to be a trivial call if the body is just a call with
// trivial arguments, like this:
//
// (func $forward (param $x) (param $y)
// (call $target (local.get $x) (local.get $y))
// )
//
// Specifically the body must be a call, and the operands to the call must be
// of size 1 (generally, LocalGet or Const).
bool isTrivialCall;
InliningMode inliningMode;

FunctionInfo() { clear(); }
Expand All @@ -85,6 +97,7 @@ struct FunctionInfo {
hasLoops = false;
hasTryDelegate = false;
usedGlobally = false;
isTrivialCall = false;
inliningMode = InliningMode::Unknown;
}

Expand All @@ -96,6 +109,7 @@ struct FunctionInfo {
hasLoops = other.hasLoops;
hasTryDelegate = other.hasTryDelegate;
usedGlobally = other.usedGlobally;
isTrivialCall = other.isTrivialCall;
inliningMode = other.inliningMode;
return *this;
}
Expand All @@ -122,16 +136,28 @@ struct FunctionInfo {
if (size > options.inlining.flexibleInlineMaxSize) {
return false;
}
// More than one use, so we can't eliminate it after inlining,
// so only worth it if we really care about speed and don't care
// about size. First, check if it has calls. In that case it is not
// likely to speed us up, and also if we want to inline such
// functions we would need to be careful to avoid infinite recursion.
if (hasCalls) {
// More than one use, so we can't eliminate it after inlining, and inlining
// it will hurt code size. Stop if we are focused on size or not heavily
// focused on speed.
if (options.shrinkLevel > 0 || options.optimizeLevel < 3) {
return false;
}
return options.optimizeLevel >= 3 && options.shrinkLevel == 0 &&
(!hasLoops || options.inlining.allowFunctionsWithLoops);
if (hasCalls) {
// This has calls. If it is just a trivial call itself then inline, as we
// will save a call that way - basically we skip a trampoline in the
// middle - but if it is something more complex, leave it alone, as we may
// not help much (and with recursion we may end up with a wasteful
// increase in code size).
//
// Note that inlining trivial calls may increase code size, e.g. if they
// use a parameter more than once (forcing us after inlining to save that
// value to a local, etc.), but here we are optimizing for speed and not
// size, so we risk it.
return isTrivialCall;
}
// This doesn't have calls. Inline if loops do not prevent us (normally, a
// loop suggests a lot of work and so inlining is less useful).
return !hasLoops || options.inlining.allowFunctionsWithLoops;
}
};

Expand Down Expand Up @@ -198,6 +224,14 @@ struct FunctionInfoScanner
}

info.size = Measurer::measure(curr->body);

if (auto* call = curr->body->dynCast<Call>()) {
if (info.size == call->operands.size() + 1) {
// This function body is a call with some trivial (size 1) operands like
// LocalGet or Const, so it is a trivial call.
Comment on lines +230 to +231
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Counting by number of Expressions rather than by number of bytes, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, Measurer just counts Expressions. That happens to be useful here, but in general we might want to add something more accurate some day for bytes.

info.isTrivialCall = true;
}
}
}

private:
Expand Down
19 changes: 6 additions & 13 deletions test/lit/passes/inlining-optimizing_optimize-level=3.wast
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

;; CHECK: (type $FUNCSIG$i (func (result i32)))

;; CHECK: (type $5 (func (param i32 i32 i32 i32) (result i32)))

;; CHECK: (type $FUNCSIG$vii (func (param i32 i32)))

;; CHECK: (type $6 (func (param i32 i32 i32 i32) (result i32)))

;; CHECK: (type $FUNCSIG$v (func))
(type $FUNCSIG$v (func))
(type $FUNCSIG$i (func (result i32)))
Expand Down Expand Up @@ -6487,11 +6487,12 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $12
;; CHECK-NEXT: (call $___udivdi3
;; CHECK-NEXT: (call $___udivmoddi4
;; CHECK-NEXT: (local.get $12)
;; CHECK-NEXT: (local.get $20)
;; CHECK-NEXT: (i32.const 1000000000)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (br_if $while-in66
Expand Down Expand Up @@ -14813,11 +14814,12 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $0
;; CHECK-NEXT: (call $___udivdi3
;; CHECK-NEXT: (call $___udivmoddi4
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (i32.const 10)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (if
Expand Down Expand Up @@ -30696,15 +30698,6 @@
)
(local.get $3)
)
;; CHECK: (func $___udivdi3 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32)
;; CHECK-NEXT: (call $___udivmoddi4
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (local.get $3)
;; CHECK-NEXT: (i32.const 0)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $___udivdi3 (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32)
(call $___udivmoddi4
(local.get $0)
Expand Down
155 changes: 155 additions & 0 deletions test/lit/passes/inlining_optimize-level=3.wast
Original file line number Diff line number Diff line change
Expand Up @@ -563,3 +563,158 @@
(unreachable)
)
)

;; Inlining of trivial calls in the middle.
(module
(table 10 funcref)

;; Refer to the middle functions so that we do not inline them as single-use
;; functions (which would be a trivial case, not related to trivial calls).
(elem (i32.const 0) $middle1 $middle2 $middle3)

;; CHECK: (type $0 (func (param i32 i32 i32)))

;; CHECK: (type $1 (func))

;; CHECK: (table $0 10 funcref)

;; CHECK: (elem $0 (i32.const 0) $middle1 $middle2 $middle3)

;; CHECK: (func $top (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (loop $loop
;; CHECK-NEXT: (br $loop)
;; CHECK-NEXT: )
;; CHECK-NEXT: (nop)
;; CHECK-NEXT: (nop)
;; CHECK-NEXT: )
(func $top (param $x i32) (param $y i32) (param $z i32)
;; This top function will not be inlined.
(loop $loop
(br $loop)
)
;; Add to the size so it isn't inlined as a tiny function.
(nop)
(nop)
)

;; CHECK: (func $middle1 (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: (local.get $y)
;; CHECK-NEXT: (local.get $z)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $middle1 (param $x i32) (param $y i32) (param $z i32)
;; This function is a trivial call, which we can inline to the bottom.
(call $top
(local.get $x)
(local.get $y)
(local.get $z)
)
)

;; CHECK: (func $middle2 (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $z)
;; CHECK-NEXT: (i32.const 42)
;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $middle2 (param $x i32) (param $y i32) (param $z i32)
;; Also trivial, even though the order of params is different and we have a
;; const.
(call $top
(local.get $z)
(i32.const 42)
(local.get $x)
)
)

;; CHECK: (func $middle3 (param $x i32) (param $y i32) (param $z i32)
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $z)
;; CHECK-NEXT: (i32.eqz
;; CHECK-NEXT: (i32.const 42)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.get $x)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $middle3 (param $x i32) (param $y i32) (param $z i32)
;; Not trivial, becaues of the eqz.
(call $top
(local.get $z)
(i32.eqz
(i32.const 42)
)
(local.get $x)
)
)

;; CHECK: (func $bottom
;; CHECK-NEXT: (local $0 i32)
;; CHECK-NEXT: (local $1 i32)
;; CHECK-NEXT: (local $2 i32)
;; CHECK-NEXT: (local $3 i32)
;; CHECK-NEXT: (local $4 i32)
;; CHECK-NEXT: (local $5 i32)
;; CHECK-NEXT: (block
;; CHECK-NEXT: (block $__inlined_func$middle1
;; CHECK-NEXT: (local.set $0
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $1
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $2
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $0)
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (block
;; CHECK-NEXT: (block $__inlined_func$middle2$1
;; CHECK-NEXT: (local.set $3
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $4
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $5
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: (call $top
;; CHECK-NEXT: (local.get $5)
;; CHECK-NEXT: (i32.const 42)
;; CHECK-NEXT: (local.get $3)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (call $middle3
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $bottom
;; The first two will be inlined.
(call $middle1
(i32.const 1)
(i32.const 2)
(i32.const 3)
)
(call $middle2
(i32.const 1)
(i32.const 2)
(i32.const 3)
)
(call $middle3
(i32.const 1)
(i32.const 2)
(i32.const 3)
)
)
)
Loading