-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Runtime lookup clean up, enable for helper-based tail calls #83430
Conversation
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch, @kunalspathak Issue DetailsClean up some dead code and enable runtime lookup expansion for helper-based tail-calls where we previously only used slow fallbacks.
|
src/coreclr/jit/importer.cpp
Outdated
impAppendTree(qmark, CHECK_SPILL_NONE, impCurStmtDI); | ||
|
||
return gtNewLclvNode(slotLclNum, TYP_I_IMPL); | ||
assert(!pRuntimeLookup->testForFixup); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be deleted from JIT/EE interface.
A couple of diffs in coreclr_tests where we have ; Program:GenInterfaceForwardG
@@ -39,11 +42,26 @@ G_M7779_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov rsi, r9
; gcrRegs +[rsi]
;; size=44 bbWeight=1 PerfScore 10.58
-G_M7779_IG02: ; bbWeight=1, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref
+G_M7779_IG02: ; bbWeight=1, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref, isz
+ mov rdx, qword ptr [rcx+38H]
+ cmp qword ptr [rdx+10H], 32
+ jle SHORT G_M7779_IG05
+ ;; size=11 bbWeight=1 PerfScore 6.00
+G_M7779_IG03: ; bbWeight=0.80, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref, isz
+ mov rdx, qword ptr [rdx+20H]
+ test rdx, rdx
+ je SHORT G_M7779_IG05
+ ;; size=9 bbWeight=0.80 PerfScore 2.60
+G_M7779_IG04: ; bbWeight=0.64, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref, isz
+ jmp SHORT G_M7779_IG06
+ ;; size=2 bbWeight=0.64 PerfScore 1.28
+G_M7779_IG05: ; bbWeight=0.36, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref
mov rdx, 0xD1FFAB1E ; global ptr
call CORINFO_HELP_RUNTIMEHANDLE_METHOD
; gcr arg pop 0
mov rdx, rax
+ ;; size=18 bbWeight=0.36 PerfScore 0.54
+G_M7779_IG06: ; bbWeight=1, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref
mov rcx, rsi
; gcrRegs +[rcx]
mov r8, 0xD1FFAB1E ; token handle
@@ -67,8 +85,8 @@ G_M7779_IG02: ; bbWeight=1, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {},
; gcr arg pop 0
mov rax, gword ptr [rsp+30H]
; gcrRegs +[rax]
- ;; size=91 bbWeight=1 PerfScore 11.50
-G_M7779_IG03: ; bbWeight=1, epilog, nogc, extend
+ ;; size=73 bbWeight=1 PerfScore 10.00
+G_M7779_IG07: ; bbWeight=1, epilog, nogc, extend
add rsp, 96
pop rbx
pop rsi
@@ -76,7 +94,7 @@ G_M7779_IG03: ; bbWeight=1, epilog, nogc, extend
ret
;; size=8 bbWeight=1 PerfScore 2.75
-; Total bytes of code 143 |
Nice! Any benchmarks? BTW, doesn't the block layout look kind of odd here: +G_M7779_IG03: ; bbWeight=0.80, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref, isz
+ mov rdx, qword ptr [rdx+20H]
+ test rdx, rdx
+ je SHORT G_M7779_IG05
+ ;; size=9 bbWeight=0.80 PerfScore 2.60
+G_M7779_IG04: ; bbWeight=0.64, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref, isz
+ jmp SHORT G_M7779_IG06
+ ;; size=2 bbWeight=0.64 PerfScore 1.28
+G_M7779_IG05: ; bbWeight=0.36, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref
mov rdx, 0xD1FFAB1E ; global ptr
call CORINFO_HELP_RUNTIMEHANDLE_METHOD
; gcr arg pop 0
mov rdx, rax
+ ;; size=18 bbWeight=0.36 PerfScore 0.54
+G_M7779_IG06: ; bbWeight=1, gcrefRegs=00C0 {rsi rdi}, byrefRegs=0000 {}, byref Couldn't this be |
Quick benchmark: public struct ForceHelper
{
public int A, B, C, D, E, F, G;
}
public interface IFoo<T>
{
long Count(int count, long sum, ForceHelper fh);
}
public unsafe class C<T> : IFoo<T>
{
public long Count(int iterations, long sum, ForceHelper fh)
{
if (iterations == 0)
return sum;
ForceHelper local = fh;
local.A = 30;
IL.Push(this);
IL.Push(iterations - 1);
IL.Push(sum + iterations);
IL.Push(local);
IL.Emit.Tail();
IL.Emit.Callvirt(new MethodRef(typeof(IFoo<T>), "Count"));
return IL.Return<long>();
}
}
🚀 |
Thanks for checking - I was puzzled on how to properly benchmark it 🙂.
We emit block layout like this: // prevBb(BBJ_NONE): [weight: 1.0]
// ...
//
// nullcheckBb(BBJ_COND): [weight: 1.0]
// if (*fastPathValue == null)
// goto fallbackBb;
//
// fastPathBb(BBJ_ALWAYS): [weight: 0.8]
// rtLookupLcl = *fastPathValue;
// goto block;
//
// fallbackBb(BBJ_NONE): [weight: 0.2]
// rtLookupLcl = HelperCall();
//
// block(...): [weight: 1.0]
// use(rtLookupLcl); We probably need to re-order fastPathBb and fallbackBb |
Follow up to #81635
Clean up some dead code and enable runtime lookup expansion for helper-based tail-calls where we previously only used slow fallbacks.