-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: Add support for induced field accesses in physical promotion #87410
JIT: Add support for induced field accesses in physical promotion #87410
Conversation
Add support for promoting fields based on the fact that assignment decomposition induces new field accesses. To do so we store all struct assignments that involve candidates for physical promotion as part of the initial accounting pass. Then when picking the replacements we do it as a fixpoint computation, iteratively expanding the set of replacements based on field accesses induced by decomposition assignment with the existing set of replacements. Fix dotnet#87371
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue DetailsAdd support for promoting fields based on the fact that assignment Fix #87371
|
61548b4
to
7af29f9
Compare
/azp run runtime-coreclr jitstress, runtime-coreclr libraries-jitstress |
Azure Pipelines successfully started running 2 pipeline(s). |
/azp run runtime-jit-experimental |
Azure Pipelines successfully started running 1 pipeline(s). |
Example from the [MethodImpl(MethodImplOptions.NoInlining)]
private static int SumSpanWrapper(SpanWrapper<int> spanWrapper)
{
int sum = 0;
foreach (int val in spanWrapper)
sum += val;
return sum;
}
public ref struct SpanWrapper<T>
{
public ReadOnlySpan<T> Span;
public ReadOnlySpan<T>.Enumerator GetEnumerator() => Span.GetEnumerator();
} @@ -7,69 +7,60 @@
; 0 inlinees with PGO data; 4 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T04] ( 3, 6 ) byref -> rcx ld-addr-op single-def
-; V01 loc0 [V01,T03] ( 4, 9 ) int -> rax
+; V00 arg0 [V00,T03] ( 4, 8 ) byref -> rcx ld-addr-op single-def
+; V01 loc0 [V01,T04] ( 4, 9 ) int -> rax
;* V02 loc1 [V02 ] ( 0, 0 ) struct (24) zero-ref do-not-enreg[SF] ld-addr-op
;* V03 loc2 [V03 ] ( 0, 0 ) int -> zero-ref
; V04 OutArgs [V04 ] ( 1, 1 ) struct (32) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V05 tmp1 [V05,T06] ( 3, 6 ) struct (24) [rsp+20H] do-not-enreg[SF] must-init ld-addr-op "NewObj constructor temp"
+;* V05 tmp1 [V05 ] ( 0, 0 ) struct (24) zero-ref do-not-enreg[SF] ld-addr-op "NewObj constructor temp"
;* V06 tmp2 [V06 ] ( 0, 0 ) struct (16) zero-ref "Inlining Arg"
;* V07 tmp3 [V07 ] ( 0, 0 ) int -> zero-ref "Span.get_Item index"
;* V08 tmp4 [V08,T05] ( 0, 0 ) bool -> zero-ref "Inline return value spill temp"
; V09 tmp5 [V09,T00] ( 3, 20 ) int -> r8 "Inline stloc first use temp"
;* V10 tmp6 [V10 ] ( 0, 0 ) byref -> zero-ref V06._reference(offs=0x00) P-INDEP "field V06._reference (fldOffset=0x0)"
;* V11 tmp7 [V11 ] ( 0, 0 ) int -> zero-ref V06._length(offs=0x08) P-INDEP "field V06._length (fldOffset=0x8)"
-; V12 tmp8 [V12,T07] ( 2, 3 ) byref -> rdx single-def "V02.[000..008)"
+; V12 tmp8 [V12,T06] ( 2, 3 ) byref -> rdx single-def "V02.[000..008)"
; V13 tmp9 [V13,T02] ( 3, 11 ) int -> rcx single-def "V02.[008..012)"
; V14 tmp10 [V14,T01] ( 5, 17 ) int -> r8 "V02.[016..020)"
-;* V15 tmp11 [V15 ] ( 0, 0 ) int -> zero-ref "V05.[016..020)"
+; V15 tmp11 [V15,T07] ( 2, 2 ) byref -> rdx single-def "V05.[000..008)"
+; V16 tmp12 [V16,T08] ( 2, 2 ) int -> rcx single-def "V05.[008..012)"
+;* V17 tmp13 [V17 ] ( 0, 0 ) int -> zero-ref "V05.[016..020)"
;
-; Lcl frame size = 56
+; Lcl frame size = 40
G_M29036_IG01: ;; offset=0000H
- sub rsp, 56
- vzeroupper
- vxorps xmm4, xmm4, xmm4
- vmovdqa xmmword ptr [rsp+20H], xmm4
+ sub rsp, 40
+ ;; size=4 bbWeight=1 PerfScore 0.25
+G_M29036_IG02: ;; offset=0004H
xor eax, eax
- mov qword ptr [rsp+30H], rax
- ;; size=24 bbWeight=1 PerfScore 4.83
-G_M29036_IG02: ;; offset=0018H
- xor eax, eax
- ;; size=2 bbWeight=1 PerfScore 0.25
-G_M29036_IG03: ;; offset=001AH
- vmovdqu xmm0, xmmword ptr [rcx]
- vmovdqu xmmword ptr [rsp+20H], xmm0
- ;; size=10 bbWeight=1 PerfScore 5.00
-G_M29036_IG04: ;; offset=0024H
- mov rdx, bword ptr [rsp+20H]
- mov ecx, dword ptr [rsp+28H]
+ mov rdx, bword ptr [rcx]
+ mov ecx, dword ptr [rcx+08H]
mov r8d, -1
- jmp SHORT G_M29036_IG06
- ;; size=17 bbWeight=1 PerfScore 4.25
-G_M29036_IG05: ;; offset=0035H
+ jmp SHORT G_M29036_IG04
+ ;; size=16 bbWeight=1 PerfScore 6.50
+G_M29036_IG03: ;; offset=0014H
cmp r8d, ecx
- jae SHORT G_M29036_IG09
+ jae SHORT G_M29036_IG07
mov r9d, r8d
add eax, dword ptr [rdx+4*r9]
;; size=12 bbWeight=2 PerfScore 9.00
-G_M29036_IG06: ;; offset=0041H
+G_M29036_IG04: ;; offset=0020H
inc r8d
cmp r8d, ecx
- jge SHORT G_M29036_IG08
+ jge SHORT G_M29036_IG06
;; size=8 bbWeight=8 PerfScore 12.00
-G_M29036_IG07: ;; offset=0049H
- jmp SHORT G_M29036_IG05
+G_M29036_IG05: ;; offset=0028H
+ jmp SHORT G_M29036_IG03
;; size=2 bbWeight=4 PerfScore 8.00
-G_M29036_IG08: ;; offset=004BH
- add rsp, 56
+G_M29036_IG06: ;; offset=002AH
+ add rsp, 40
ret
;; size=5 bbWeight=4 PerfScore 5.00
-G_M29036_IG09: ;; offset=0050H
+G_M29036_IG07: ;; offset=002FH
call CORINFO_HELP_RNGCHKFAIL
int3
;; size=6 bbWeight=0 PerfScore 0.00
-; Total bytes of code 86, prolog size 24, PerfScore 56.93, instruction count 25, allocated bytes for code 86 (MethodHash=bccc8e93) for method Program:SumSpanWrapper(Program+SpanWrapper`1[int]):int
+; Total bytes of code 53, prolog size 4, PerfScore 46.05, instruction count 18, allocated bytes for code 53 (MethodHash=bccc8e93) for method Program:SumSpanWrapper(Program+SpanWrapper`1[int]):int *************** Starting PHASE Physical promotion
Picking promotions
Accesses for V00
[000..016) as Program+SpanWrapper`1[int]
#: (1, 100)
# assigned from: (1, 100)
# assigned to: (0, 0)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
Picking promotions for V00
Accesses for V02
[000..024) as System.ReadOnlySpan`1+Enumerator[int]
#: (1, 100)
# assigned from: (0, 0)
# assigned to: (1, 100)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
byref @ 000
#: (1, 100)
# assigned from: (0, 0)
# assigned to: (0, 0)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
int @ 008
#: (2, 200)
# assigned from: (0, 0)
# assigned to: (0, 0)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
int @ 016
#: (3, 300)
# assigned from: (1, 100)
# assigned to: (1, 100)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
Picking promotions for V02
Evaluating access byref @ 000
Single write-back cost: 3
Write backs: 0
Read backs: 0
Cost with: 50
Cost without: 300
Promoting replacement
Evaluating access int @ 008
Single write-back cost: 3
Write backs: 0
Read backs: 0
Cost with: 100
Cost without: 600
Promoting replacement
Evaluating access int @ 016
Single write-back cost: 3
Write backs: 0
Read backs: 0
Cost with: 150
Cost without: 900
Promoting replacement
Accesses for V05
[000..016) as System.ReadOnlySpan`1[int]
#: (1, 100)
# assigned from: (0, 0)
# assigned to: (1, 100)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
[000..024) as System.ReadOnlySpan`1+Enumerator[int]
#: (1, 100)
# assigned from: (1, 100)
# assigned to: (0, 0)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
int @ 016
#: (1, 100)
# assigned from: (0, 0)
# assigned to: (1, 100)
# as call arg: (0, 0)
# as retbuf: (0, 0)
# as returned value: (0, 0)
Picking promotions for V05
Evaluating access int @ 016
Single write-back cost: 3
Write backs: 0
Read backs: 0
Cost with: 50
Cost without: 300
Promoting replacement
Looking for induced accesses with 2 stores between candidates
Induced accesses for V05
byref @ 000
#: (1, 100)
int @ 008
#: (1, 100)
Picking induced promotions for V05
Evaluating access byref @ 000
Single write-back cost: 3
Write backs: 0
Read backs: 0
Cost with: 50
Cost without: 300
Promoting replacement // new promotion
Evaluating access int @ 008
Single write-back cost: 3
Write backs: 0
Read backs: 0
Cost with: 50
Cost without: 300
Promoting replacement // new promotion
Induced accesses for V00
byref @ 000
#: (1, 100)
int @ 008
#: (1, 100)
Picking induced promotions for V00
Evaluating access byref @ 000
Single write-back cost: 3
Write backs: 0
Read backs: 100
Cost with: 350
Cost without: 300
Disqualifying replacement
Evaluating access int @ 008
Single write-back cost: 3
Write backs: 0
Read backs: 100
Cost with: 350
Cost without: 300
Disqualifying replacement
lvaGrabTemp returning 12 (V12 tmp8) (a long lifetime temp) called for V02.[000..008).
lvaGrabTemp returning 13 (V13 tmp9) (a long lifetime temp) called for V02.[008..012).
lvaGrabTemp returning 14 (V14 tmp10) (a long lifetime temp) called for V02.[016..020).
V02 promoted with 3 replacements
[000..008) promoted as byref V12
[008..012) promoted as int V13
[016..020) promoted as int V14
Computing unpromoted remainder for V02
Remainder: [012..016)
lvaGrabTemp returning 15 (V15 tmp11) (a long lifetime temp) called for V05.[000..008).
lvaGrabTemp returning 16 (V16 tmp12) (a long lifetime temp) called for V05.[008..012).
lvaGrabTemp returning 17 (V17 tmp13) (a long lifetime temp) called for V05.[016..020).
V05 promoted with 3 replacements
[000..008) promoted as byref V15
[008..012) promoted as int V16
[016..020) promoted as int V17
Computing unpromoted remainder for V05
Remainder: [012..016) |
Looks like there definitely is a noticeable TP impact, significantly higher in realworld.run and libraries.pmi than other collections. Odd as I didn't see this locally, but likely just user error. I think there's plenty of things to investigate to improve TP for this, but I will probably leave that for a later task if there is time for it. Also for some reason this is a large improvement on all platforms except linux-arm, where it is a large regression. Need to investigate that. |
The problem is that I forgot to initialize |
One source of regressions is that physical promotion can cause us to no longer be able to optimize boxes away, e.g. in @@ -10,11 +10,14 @@
;* V00 loc0 [V00 ] ( 0, 0 ) struct (32) zero-ref do-not-enreg[SF] ld-addr-op
; V01 OutArgs [V01 ] ( 1, 1 ) struct (32) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V02 tmp1 [V02 ] ( 0, 0 ) struct (32) zero-ref do-not-enreg[S] ld-addr-op "Inlining Arg"
-;* V03 tmp2 [V03 ] ( 0, 0 ) ref -> zero-ref class-hnd exact "Single-def Box Helper"
+; V03 tmp2 [V03,T00] ( 5, 10 ) ref -> rax class-hnd exact single-def "Single-def Box Helper"
;* V04 tmp3 [V04 ] ( 0, 0 ) ref -> zero-ref class-hnd exact "Inlining Arg"
;* V05 tmp4 [V05 ] ( 0, 0 ) int -> zero-ref "V00.[008..012)"
-;* V06 tmp5 [V06,T00] ( 0, 0 ) int -> zero-ref single-def "V00.[012..016)"
-;* V07 tmp6 [V07,T01] ( 0, 0 ) int -> zero-ref single-def "V00.[016..020)"
+;* V06 tmp5 [V06,T01] ( 0, 0 ) int -> zero-ref single-def "V00.[012..016)"
+;* V07 tmp6 [V07,T02] ( 0, 0 ) int -> zero-ref single-def "V00.[016..020)"
+;* V08 tmp7 [V08 ] ( 0, 0 ) int -> zero-ref single-def "V02.[008..012)"
+;* V09 tmp8 [V09 ] ( 0, 0 ) int -> zero-ref single-def "V02.[012..016)"
+;* V10 tmp9 [V10 ] ( 0, 0 ) int -> zero-ref single-def "V02.[016..020)"
;
; Lcl frame size = 40
@@ -22,20 +25,38 @@ G_M38373_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M38373_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
+ mov rcx, 0xD1FFAB1E ; Microsoft.Diagnostics.Tracing.Extensions.ETWKernelControl+RTL_OSVERSIONINFOEX
+ call CORINFO_HELP_NEWSFAST
+ ; gcrRegs +[rax]
+ ; gcr arg pop 0
+ xor ecx, ecx
+ mov qword ptr [rax+08H], rcx
+ mov qword ptr [rax+10H], rcx
+ mov qword ptr [rax+18H], rcx
+ mov qword ptr [rax+20H], rcx
+ ;; size=33 bbWeight=1 PerfScore 5.50
+G_M38373_IG03: ; bbWeight=1, gcrefRegs=0001 {rax}, byrefRegs=0000 {}, byref
+ mov dword ptr [rax+10H], ecx
+ ;; size=3 bbWeight=1 PerfScore 1.00
+G_M38373_IG04: ; bbWeight=1, gcrefRegs=0001 {rax}, byrefRegs=0000 {}, byref
+ mov dword ptr [rax+14H], ecx
+ ;; size=3 bbWeight=1 PerfScore 1.00
+G_M38373_IG05: ; bbWeight=1, gcrefRegs=0001 {rax}, byrefRegs=0000 {}, byref
+ mov dword ptr [rax+18H], ecx
mov rcx, 0xD1FFAB1E
; gcrRegs +[rcx]
mov edx, 1
call <unknown method>
- ; gcrRegs -[rcx]
+ ; gcrRegs -[rax rcx]
; gcr arg pop 0
xor eax, eax
- ;; size=22 bbWeight=1 PerfScore 1.75
-G_M38373_IG03: ; bbWeight=1, epilog, nogc, extend
+ ;; size=25 bbWeight=1 PerfScore 2.75
+G_M38373_IG06: ; bbWeight=1, epilog, nogc, extend
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
-; Total bytes of code 31, prolog size 4, PerfScore 6.35, instruction count 7, allocated bytes for code 31 (MethodHash=d6476a1a) for method Microsoft.Diagnostics.Tracing.Extensions.ETWKernelControl:IsWin8orNewer():bool
+; Total bytes of code 73, prolog size 4, PerfScore 19.05, instruction count 17, allocated bytes for code 73 (MethodHash=d6476a1a) for method Microsoft.Diagnostics.Tracing.Extensions.ETWKernelControl:IsWin8orNewer():bool That's caused by fgMorphTree BB01, STMT00008 (before)
[000047] ----------- ▌ JTRUE void
[000046] ----------- └──▌ NE int
[000034] ----------- ├──▌ BOX ref
[000033] ----------- │ └──▌ LCL_VAR ref V03 tmp2 (last use)
[000045] ----------- └──▌ CNS_INT ref null
Attempting to optimize BOX(valueType) NE null [000046]
gtTryRemoveBoxUpstreamEffects: attempting to remove side effects of BOX (valuetype) [000034] (assign/newobj STMT00004 copy STMT00005
-Bashing NEWOBJ [000028] to NOP
-
-Bashing COPY [000032] to NOP; no source side effects.
-
-Success: replacing BOX(valueType) NE null with 1
-
-fgMorphTree BB01, STMT00008 (after)
- [000047] -----+----- ▌ JTRUE void
- [000074] -----+----- └──▌ CNS_INT int 1
-removing useless STMT00008 ( INL02 @ 0x000[E-] ... ??? ) <- INL01 @ ??? <- INLRT @ 0x008[E-]
- [000047] -----+----- ▌ JTRUE void
- [000074] -----+----- └──▌ CNS_INT int 1
- from BB01
-
-Conditional folded at BB01
-BB01 becomes a BBJ_ALWAYS to BB04
+ bailing; unexpected copy op COMMA Don't plan to address this, hopefully something that the work on multi-use boxes can help with. |
cc @dotnet/jit-contrib PTAL @AndyAyersMS Pretty good diffs for this change: Diffs with physical promotion. Diffs without old promotion. As mentioned above there are things that could be done to mitigate some of the TP hit, for example when we create a new replacement we ideally would only go look at the stores that actually involves the new replacements created. However, I think I will look at TP improvements separately if there is more time. This changed passed jitstress, libraries-jitstress and runtime-jit-experimental above. There are some linux CI timeouts but it passed those jobs on previous runs so I don't think we need to rerun. |
// Expand the set of fields iteratively based on the current picked | ||
// set. We put a limit on this fixpoint computation to avoid | ||
// pathological cases. From measurements no methods in our own | ||
// collections need more than 10 iterations and 99.5% of methods | ||
// need fewer than 5 iterations. | ||
for (int iters = 0; iters < 10; iters++) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some histograms over our collections of how many iterations are required (without the limit):
JitEnablePhysicalPromotion=1
realworld:
<= 0 ===> 510 count ( 33% of total)
1 .. 1 ===> 425 count ( 60% of total)
2 .. 2 ===> 363 count ( 84% of total)
3 .. 3 ===> 203 count ( 97% of total)
4 .. 4 ===> 29 count ( 99% of total)
5 .. 5 ===> 5 count ( 99% of total)
6 .. 6 ===> 2 count (100% of total)
7 .. 7 ===> 0 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
benchmarks.run_pgo
<= 0 ===> 383 count ( 48% of total)
1 .. 1 ===> 176 count ( 71% of total)
2 .. 2 ===> 192 count ( 95% of total)
3 .. 3 ===> 35 count ( 99% of total)
4 .. 4 ===> 0 count ( 99% of total)
5 .. 5 ===> 1 count (100% of total)
6 .. 6 ===> 0 count (100% of total)
7 .. 7 ===> 0 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
aspnet:
<= 0 ===> 1051 count ( 44% of total)
1 .. 1 ===> 593 count ( 69% of total)
2 .. 2 ===> 349 count ( 83% of total)
3 .. 3 ===> 305 count ( 96% of total)
4 .. 4 ===> 66 count ( 99% of total)
5 .. 5 ===> 14 count (100% of total)
6 .. 6 ===> 0 count (100% of total)
7 .. 7 ===> 0 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
libraries.pmi:
<= 0 ===> 2265 count ( 36% of total)
1 .. 1 ===> 1911 count ( 67% of total)
2 .. 2 ===> 1137 count ( 85% of total)
3 .. 3 ===> 683 count ( 96% of total)
4 .. 4 ===> 161 count ( 99% of total)
5 .. 5 ===> 14 count ( 99% of total)
6 .. 6 ===> 13 count ( 99% of total)
7 .. 7 ===> 1 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
coreclr_tests:
<= 0 ===> 676 count ( 24% of total)
1 .. 1 ===> 1563 count ( 81% of total)
2 .. 2 ===> 357 count ( 94% of total)
3 .. 3 ===> 123 count ( 98% of total)
4 .. 4 ===> 23 count ( 99% of total)
5 .. 5 ===> 13 count (100% of total)
6 .. 6 ===> 0 count (100% of total)
7 .. 7 ===> 0 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
libraries_tests:
<= 0 ===> 13044 count ( 21% of total)
1 .. 1 ===> 28551 count ( 69% of total)
2 .. 2 ===> 11932 count ( 89% of total)
3 .. 3 ===> 5347 count ( 98% of total)
4 .. 4 ===> 638 count ( 99% of total)
5 .. 5 ===> 108 count ( 99% of total)
6 .. 6 ===> 68 count ( 99% of total)
7 .. 7 ===> 8 count ( 99% of total)
8 .. 8 ===> 9 count ( 99% of total)
9 .. 9 ===> 3 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
JitEnablePhysicalPromotion=1;JitStressModeNames=STRESS_NO_OLD_PROMOTION
realworld:
<= 0 ===> 2582 count ( 32% of total)
1 .. 1 ===> 2521 count ( 64% of total)
2 .. 2 ===> 1692 count ( 85% of total)
3 .. 3 ===> 929 count ( 97% of total)
4 .. 4 ===> 146 count ( 99% of total)
5 .. 5 ===> 19 count ( 99% of total)
6 .. 6 ===> 11 count ( 99% of total)
7 .. 7 ===> 2 count ( 99% of total)
8 .. 8 ===> 1 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
benchmarks.run_pgo:
<= 0 ===> 1484 count ( 15% of total)
1 .. 1 ===> 3914 count ( 56% of total)
2 .. 2 ===> 3349 count ( 91% of total)
3 .. 3 ===> 653 count ( 98% of total)
4 .. 4 ===> 101 count ( 99% of total)
5 .. 5 ===> 11 count (100% of total)
6 .. 6 ===> 0 count (100% of total)
7 .. 7 ===> 0 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
aspnet:
<= 0 ===> 5836 count ( 31% of total)
1 .. 1 ===> 5383 count ( 61% of total)
2 .. 2 ===> 3652 count ( 81% of total)
3 .. 3 ===> 2850 count ( 96% of total)
4 .. 4 ===> 269 count ( 98% of total)
5 .. 5 ===> 195 count ( 99% of total)
6 .. 6 ===> 85 count ( 99% of total)
7 .. 7 ===> 8 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
> 10 ===> 2 count (100% of total)
libraries.pmi:
<= 0 ===> 13895 count ( 32% of total)
1 .. 1 ===> 16030 count ( 69% of total)
2 .. 2 ===> 8120 count ( 88% of total)
3 .. 3 ===> 4008 count ( 97% of total)
4 .. 4 ===> 769 count ( 99% of total)
5 .. 5 ===> 95 count ( 99% of total)
6 .. 6 ===> 53 count ( 99% of total)
7 .. 7 ===> 13 count ( 99% of total)
8 .. 8 ===> 5 count ( 99% of total)
9 .. 9 ===> 3 count ( 99% of total)
10 .. 10 ===> 1 count (100% of total)
coreclr_tests:
<= 0 ===> 8293 count ( 17% of total)
1 .. 1 ===> 32333 count ( 85% of total)
2 .. 2 ===> 4535 count ( 94% of total)
3 .. 3 ===> 2136 count ( 99% of total)
4 .. 4 ===> 291 count ( 99% of total)
5 .. 5 ===> 79 count ( 99% of total)
6 .. 6 ===> 42 count ( 99% of total)
7 .. 7 ===> 6 count ( 99% of total)
8 .. 8 ===> 2 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
libraries_tests:
<= 0 ===> 1941 count ( 32% of total)
1 .. 1 ===> 1898 count ( 64% of total)
2 .. 2 ===> 1253 count ( 85% of total)
3 .. 3 ===> 745 count ( 97% of total)
4 .. 4 ===> 113 count ( 99% of total)
5 .. 5 ===> 27 count ( 99% of total)
6 .. 6 ===> 3 count ( 99% of total)
7 .. 7 ===> 4 count (100% of total)
8 .. 8 ===> 0 count (100% of total)
9 .. 9 ===> 0 count (100% of total)
10 .. 10 ===> 0 count (100% of total)
Ping @AndyAyersMS -- if possible would like to get this in today so that I have some perf lab data to look at with the new expanded set of promotions on Monday. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Add support for promoting fields based on the fact that store
decomposition induces new field accesses. To do so we store all struct
stores that involve candidates for physical promotion as part of
the initial accounting pass. Then when picking the replacements we do it
as a fixpoint computation, iteratively expanding the set of replacements
based on field accesses induced by store decomposition with the
existing set of replacements.
Fix #87371