-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: Skip old promotion for retbuf defined locals #104439
JIT: Skip old promotion for retbuf defined locals #104439
Conversation
These locals end up being dependently promoted. Skip them and allow physical promotion to handle them instead.
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch |
Looking at some regressions... benchmarks.run_pgo.windows.x64.checked.mch@@ -11,30 +11,27 @@
; Final local variable assignments
;
; V00 this [V00,T01] ( 4, 4 ) byref -> rcx this single-def
-; V01 RetBuf [V01,T00] ( 7, 7 ) byref -> rbx single-def
+; V01 RetBuf [V01,T00] ( 5, 5 ) byref -> rbx single-def
; V02 OutArgs [V02 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V03 tmp1 [V03 ] ( 0, 0 ) struct (32) zero-ref do-not-enreg[SF] ld-addr-op "NewObj constructor temp" <Microsoft.CodeAnalysis.CSharp.DefiniteAssignmentPass+LocalState>
-; V04 tmp2 [V04 ] ( 4, 8 ) struct (24) [rsp+0x28] do-not-enreg[HS] must-init hidden-struct-arg "spilled call-like call argument" <Microsoft.CodeAnalysis.BitVector>
-; V05 tmp3 [V05,T02] ( 2, 3 ) ref -> [rsp+0x28] do-not-enreg[H] hidden-struct-arg "field V04._bits (fldOffset=0x0)" P-DEP
-; V06 tmp4 [V06,T03] ( 2, 3 ) long -> [rsp+0x30] do-not-enreg[H] hidden-struct-arg "field V04._bits0 (fldOffset=0x8)" P-DEP
-; V07 tmp5 [V07,T04] ( 2, 3 ) int -> [rsp+0x38] do-not-enreg[H] hidden-struct-arg "field V04._capacity (fldOffset=0x10)" P-DEP
-;* V08 tmp6 [V08 ] ( 0, 0 ) ubyte -> zero-ref "V03.[000..001)"
-;* V09 tmp7 [V09 ] ( 0, 0 ) ref -> zero-ref single-def "V03.[008..016)"
-;* V10 tmp8 [V10 ] ( 0, 0 ) long -> zero-ref "V03.[016..024)"
-;* V11 tmp9 [V11 ] ( 0, 0 ) int -> zero-ref "V03.[024..028)"
+; V03 tmp1 [V03,T02] ( 2, 4 ) struct (32) [rsp+0x40] do-not-enreg[SF] must-init ld-addr-op "NewObj constructor temp" <Microsoft.CodeAnalysis.CSharp.DefiniteAssignmentPass+LocalState>
+; V04 tmp2 [V04,T03] ( 2, 4 ) struct (24) [rsp+0x28] do-not-enreg[HS] must-init hidden-struct-arg "spilled call-like call argument" <Microsoft.CodeAnalysis.BitVector>
+;* V05 tmp3 [V05 ] ( 0, 0 ) ubyte -> zero-ref "V03.[000..001)"
;
-; Lcl frame size = 64
+; Lcl frame size = 96
G_M17388_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
+ push rdi
+ push rsi
push rbx
- sub rsp, 64
- vxorps xmm4, xmm4, xmm4
- vmovdqu xmmword ptr [rsp+0x28], xmm4
+ sub rsp, 96
xor eax, eax
- mov qword ptr [rsp+0x38], rax
+ mov qword ptr [rsp+0x28], rax
+ vxorps xmm4, xmm4, xmm4
+ vmovdqu ymmword ptr [rsp+0x30], ymm4
+ vmovdqa xmmword ptr [rsp+0x50], xmm4
mov rbx, rdx
; byrRegs +[rbx]
- ;; size=25 bbWeight=1 PerfScore 5.08
+ ;; size=33 bbWeight=1 PerfScore 9.08
G_M17388_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=000A {rcx rbx}, byref
; byrRegs +[rcx]
cmp byte ptr [rcx], cl
@@ -43,28 +40,35 @@ G_M17388_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=000A {rcx rbx},
call [Microsoft.CodeAnalysis.BitVector:Clone():Microsoft.CodeAnalysis.BitVector:this]
; byrRegs -[rcx]
; gcr arg pop 0
+ ;; size=17 bbWeight=1 PerfScore 6.75
+G_M17388_IG03: ; bbWeight=1, nogc, extend
+ vmovdqu xmm0, xmmword ptr [rsp+0x28]
+ vmovdqu xmmword ptr [rsp+0x48], xmm0
+ mov rax, qword ptr [rsp+0x38]
+ mov qword ptr [rsp+0x58], rax
+ ;; size=22 bbWeight=1 PerfScore 6.00
+G_M17388_IG04: ; bbWeight=1, extend
+ mov rdi, rbx
+ ; byrRegs +[rdi]
+ lea rsi, bword ptr [rsp+0x40]
+ ; byrRegs +[rsi]
+ movsq
+ call CORINFO_HELP_ASSIGN_BYREF
+ movsq
+ movsq
mov byte ptr [rbx], 0
- lea rcx, bword ptr [rbx+0x08]
- ; byrRegs +[rcx]
- mov rdx, gword ptr [rsp+0x28]
- ; gcrRegs +[rdx]
- call CORINFO_HELP_CHECKED_ASSIGN_REF
- ; gcrRegs -[rdx]
- ; byrRegs -[rcx]
- mov rax, qword ptr [rsp+0x30]
- mov qword ptr [rbx+0x10], rax
- mov eax, dword ptr [rsp+0x38]
- mov dword ptr [rbx+0x18], eax
mov rax, rbx
; byrRegs +[rax]
- ;; size=53 bbWeight=1 PerfScore 14.50
-G_M17388_IG03: ; bbWeight=1, epilog, nogc, extend
- add rsp, 64
+ ;; size=25 bbWeight=1 PerfScore 6.00
+G_M17388_IG05: ; bbWeight=1, epilog, nogc, extend
+ add rsp, 96
pop rbx
+ pop rsi
+ pop rdi
ret
- ;; size=6 bbWeight=1 PerfScore 1.75
+ ;; size=8 bbWeight=1 PerfScore 2.75
-; Total bytes of code 84, prolog size 22, PerfScore 21.33, instruction count 23, allocated bytes for code 84 (MethodHash=4ae6bc13) for method Microsoft.CodeAnalysis.CSharp.DefiniteAssignmentPass+LocalState:Clone():Microsoft.CodeAnalysis.CSharp.DefiniteAssignmentPass+LocalState:this (Tier1)
+; Total bytes of code 105, prolog size 30, PerfScore 30.58, instruction count 31, allocated bytes for code 105 (MethodHash=4ae6bc13) for method Microsoft.CodeAnalysis.CSharp.DefiniteAssignmentPass+LocalState:Clone():Microsoft.CodeAnalysis.CSharp.DefiniteAssignmentPass+LocalState:this (Tier1) We end up with fewer physical promotions for V03 than before. That's because we previously regularly promoted the retbuf which has a block copy with V03; the regular promotion of the retbuf caused us to induce a number of accesses in V04. However, now that regular promotion is skipped for the retbuf we leave it up to physical promotion, and physical promotion decides not to promote; it sees only block copies between V03 and the retbuf, and does not see it as valuable to decompose this block copy. Real problem here seems to be a lack of copy propagation between the "NewObj constructor temp" and the retbuf, and the fact that the block copy handling in the backend produces less efficient sequences than similar decomposed block copies by the front end. |
Regression@@ -11,8 +11,8 @@
; Final local variable assignments
;
; V00 this [V00,T02] ( 3, 3 ) ref -> rcx this class-hnd single-def <Microsoft.CodeAnalysis.CSharp.CSharpSyntaxNode>
-; V01 RetBuf [V01,T00] ( 10, 10 ) byref -> rbx single-def
-; V02 loc0 [V02 ] ( 8, 8 ) struct (24) [rsp+0x30] do-not-enreg[XS] must-init addr-exposed ld-addr-op <Microsoft.CodeAnalysis.SyntaxToken>
+; V01 RetBuf [V01,T00] ( 6, 6 ) byref -> rbx single-def
+; V02 loc0 [V02 ] ( 4, 4 ) struct (24) [rsp+0x80] do-not-enreg[XS] must-init addr-exposed ld-addr-op <Microsoft.CodeAnalysis.SyntaxToken>
; V03 OutArgs [V03 ] ( 1, 1 ) struct (40) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V04 tmp1 [V04 ] ( 0, 0 ) ref -> zero-ref ld-addr-op class-hnd single-def "Inlining Arg" <Microsoft.CodeAnalysis.SyntaxNode>
; V05 tmp2 [V05,T04] ( 2, 4 ) ref -> rax class-hnd exact single-def "Inlining Arg" <Microsoft.CodeAnalysis.SyntaxNavigator>
@@ -26,59 +26,44 @@
;* V13 tmp10 [V13 ] ( 0, 0 ) int -> zero-ref
;* V14 tmp11 [V14 ] ( 0, 0 ) int -> zero-ref
;* V15 tmp12 [V15 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
-;* V16 tmp13 [V16 ] ( 0, 0 ) struct (40) zero-ref do-not-enreg[S] "Inline return value spill temp" <Microsoft.CodeAnalysis.SyntaxTriviaList>
+; V16 tmp13 [V16,T10] ( 4, 3 ) struct (40) [rsp+0x58] do-not-enreg[S] must-init "Inline return value spill temp" <Microsoft.CodeAnalysis.SyntaxTriviaList>
; V17 tmp14 [V17,T01] ( 5, 6 ) ref -> rcx class-hnd single-def "spilling ret_expr" <Microsoft.CodeAnalysis.GreenNode>
-; V18 tmp15 [V18,T03] ( 3, 4 ) ref -> rdx class-hnd "impAppendStmt" <Microsoft.CodeAnalysis.GreenNode>
-;* V19 tmp16 [V19 ] ( 0, 0 ) struct (40) zero-ref do-not-enreg[SF] ld-addr-op "NewObj constructor temp" <Microsoft.CodeAnalysis.SyntaxTriviaList>
+; V18 tmp15 [V18,T03] ( 3, 4 ) ref -> rbp class-hnd "impAppendStmt" <Microsoft.CodeAnalysis.GreenNode>
+; V19 tmp16 [V19,T08] ( 2, 4 ) struct (40) [rsp+0x30] do-not-enreg[SF] must-init ld-addr-op "NewObj constructor temp" <Microsoft.CodeAnalysis.SyntaxTriviaList>
;* V20 tmp17 [V20 ] ( 0, 0 ) struct (40) zero-ref do-not-enreg[S] ld-addr-op "Inline ldloca(s) first use temp" <Microsoft.CodeAnalysis.SyntaxTriviaList>
;* V21 tmp18 [V21 ] ( 0, 0 ) ref -> zero-ref class-hnd exact "guarded devirt this exact temp" <Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax.SyntaxToken+SyntaxTokenWithTrivia>
-; V22 tmp19 [V22,T08] ( 2, 4 ) int -> rsi "Inlining Arg"
-; V23 tmp20 [V23 ] ( 2, 2 ) ref -> [rsp+0x30] do-not-enreg[X] addr-exposed "field V02.<Parent>k__BackingField (fldOffset=0x0)" P-DEP
-; V24 tmp21 [V24 ] ( 4, 4 ) ref -> [rsp+0x38] do-not-enreg[X] addr-exposed "field V02.<Node>k__BackingField (fldOffset=0x8)" P-DEP
-; V25 tmp22 [V25 ] ( 2, 2 ) int -> [rsp+0x40] do-not-enreg[X] addr-exposed "field V02.<Index>k__BackingField (fldOffset=0x10)" P-DEP
-; V26 tmp23 [V26 ] ( 3, 3 ) int -> [rsp+0x44] do-not-enreg[X] addr-exposed "field V02.<Position>k__BackingField (fldOffset=0x14)" P-DEP
-;* V27 tmp24 [V27 ] ( 0, 0 ) ref -> zero-ref "V19.[000..008)"
-;* V28 tmp25 [V28 ] ( 0, 0 ) int -> zero-ref "V19.[008..012)"
-;* V29 tmp26 [V29 ] ( 0, 0 ) int -> zero-ref "V19.[012..016)"
-; V30 tmp27 [V30,T15] ( 2, 2 ) ref -> rdi single-def "V19.[016..024)"
-; V31 tmp28 [V31,T16] ( 2, 2 ) ref -> rbp single-def "V19.[024..032)"
-; V32 tmp29 [V32,T17] ( 2, 2 ) int -> r14 single-def "V19.[032..036)"
-; V33 tmp30 [V33,T18] ( 2, 2 ) int -> r15 single-def "V19.[036..040)"
-; V34 tmp31 [V34,T09] ( 3, 2 ) ref -> rdx "V16.[000..008)"
-; V35 tmp32 [V35,T12] ( 3, 2 ) int -> rsi "V16.[008..012)"
-;* V36 tmp33 [V36 ] ( 0, 0 ) int -> zero-ref "V16.[012..016)"
-; V37 tmp34 [V37,T10] ( 3, 2 ) ref -> rdi "V16.[016..024)"
-; V38 tmp35 [V38,T11] ( 3, 2 ) ref -> rbp "V16.[024..032)"
-; V39 tmp36 [V39,T13] ( 3, 2 ) int -> r14 "V16.[032..036)"
-; V40 tmp37 [V40,T14] ( 3, 2 ) int -> r15 "V16.[036..040)"
-;* V41 tmp38 [V41 ] ( 0, 0 ) ref -> zero-ref single-def "V20.[000..008)"
-;* V42 tmp39 [V42 ] ( 0, 0 ) int -> zero-ref single-def "V20.[008..012)"
-;* V43 tmp40 [V43 ] ( 0, 0 ) int -> zero-ref single-def "V20.[012..016)"
-;* V44 tmp41 [V44 ] ( 0, 0 ) ref -> zero-ref single-def "V20.[016..024)"
-;* V45 tmp42 [V45 ] ( 0, 0 ) ref -> zero-ref single-def "V20.[024..032)"
-;* V46 tmp43 [V46 ] ( 0, 0 ) int -> zero-ref single-def "V20.[032..036)"
-;* V47 tmp44 [V47 ] ( 0, 0 ) int -> zero-ref single-def "V20.[036..040)"
-; V48 tmp45 [V48,T06] ( 2, 4 ) ref -> rdx single-def "arr expr"
-;* V49 tmp46 [V49 ] ( 0, 0 ) int -> zero-ref "index expr"
-; V50 tmp47 [V50,T07] ( 2, 4 ) ref -> rdx single-def "argument with side effect"
+; V22 tmp19 [V22,T09] ( 2, 4 ) int -> rdx "Inlining Arg"
+;* V23 tmp20 [V23 ] ( 0, 0 ) ref -> zero-ref "V19.[000..008)"
+;* V24 tmp21 [V24 ] ( 0, 0 ) int -> zero-ref "V19.[008..012)"
+;* V25 tmp22 [V25 ] ( 0, 0 ) int -> zero-ref "V19.[012..016)"
+; V26 tmp23 [V26,T12] ( 3, 2 ) ref -> rbp "V16.[000..008)"
+; V27 tmp24 [V27,T13] ( 3, 2 ) int -> rdx "V16.[008..012)"
+;* V28 tmp25 [V28 ] ( 0, 0 ) int -> zero-ref "V16.[012..016)"
+;* V29 tmp26 [V29 ] ( 0, 0 ) ref -> zero-ref single-def "V20.[000..008)"
+;* V30 tmp27 [V30 ] ( 0, 0 ) int -> zero-ref single-def "V20.[008..012)"
+;* V31 tmp28 [V31 ] ( 0, 0 ) int -> zero-ref single-def "V20.[012..016)"
+; V32 tmp29 [V32,T06] ( 2, 4 ) ref -> rdx single-def "arr expr"
+;* V33 tmp30 [V33 ] ( 0, 0 ) int -> zero-ref "index expr"
+; V34 tmp31 [V34,T07] ( 2, 4 ) ref -> rdx single-def "argument with side effect"
+; V35 cse0 [V35,T11] ( 3, 3 ) ref -> rcx "CSE #01: aggressive"
;
-; Lcl frame size = 72
+; Lcl frame size = 152
G_M39825_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- push r15
- push r14
push rdi
push rsi
push rbp
push rbx
- sub rsp, 72
+ sub rsp, 152
vxorps xmm4, xmm4, xmm4
- vmovdqa xmmword ptr [rsp+0x30], xmm4
+ vmovdqu ymmword ptr [rsp+0x30], ymm4
+ vmovdqu ymmword ptr [rsp+0x50], ymm4
+ vmovdqu ymmword ptr [rsp+0x70], ymm4
xor eax, eax
- mov qword ptr [rsp+0x40], rax
+ mov qword ptr [rsp+0x90], rax
mov rbx, rdx
; byrRegs +[rbx]
- ;; size=32 bbWeight=1 PerfScore 10.08
+ ;; size=46 bbWeight=1 PerfScore 12.08
G_M39825_IG02: ; bbWeight=1, gcrefRegs=0002 {rcx}, byrefRegs=0008 {rbx}, byref, isz
; gcrRegs +[rcx]
mov rdx, 0xD1FFAB1E ; const ptr
@@ -93,7 +78,7 @@ G_M39825_IG02: ; bbWeight=1, gcrefRegs=0002 {rcx}, byrefRegs=0008 {rbx},
mov rdx, gword ptr [rdx+0x10]
mov gword ptr [rsp+0x20], rdx
; gcr arg write
- lea rdx, [rsp+0x30]
+ lea rdx, [rsp+0x80]
; gcrRegs -[rdx]
mov r8, rcx
; gcrRegs +[r8]
@@ -101,91 +86,83 @@ G_M39825_IG02: ; bbWeight=1, gcrefRegs=0002 {rcx}, byrefRegs=0008 {rbx},
call [<unknown method>]
; gcrRegs -[rax rcx r8-r9]
; gcr arg pop 0
- cmp gword ptr [rsp+0x38], 0
- je G_M39825_IG07
- mov rcx, gword ptr [rsp+0x38]
+ mov rcx, gword ptr [rsp+0x88]
; gcrRegs +[rcx]
- mov rdx, 0xD1FFAB1E ; Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax.SyntaxToken+SyntaxTokenWithTrivia
- cmp qword ptr [rcx], rdx
- jne SHORT G_M39825_IG06
- mov rdx, gword ptr [rcx+0x10]
- ; gcrRegs +[rdx]
- ;; size=101 bbWeight=1 PerfScore 24.00
-G_M39825_IG03: ; bbWeight=1, gcrefRegs=0004 {rdx}, byrefRegs=0008 {rbx}, byref
- ; gcrRegs -[rcx]
- mov esi, dword ptr [rsp+0x44]
- mov rdi, gword ptr [rsp+0x30]
- ; gcrRegs +[rdi]
- mov rbp, gword ptr [rsp+0x38]
+ test rcx, rcx
+ je G_M39825_IG09
+ mov rax, 0xD1FFAB1E ; Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax.SyntaxToken+SyntaxTokenWithTrivia
+ cmp qword ptr [rcx], rax
+ jne SHORT G_M39825_IG08
+ mov rbp, gword ptr [rcx+0x10]
; gcrRegs +[rbp]
- mov r14d, dword ptr [rsp+0x40]
- mov r15d, dword ptr [rsp+0x44]
- ;; size=24 bbWeight=1 PerfScore 5.00
-G_M39825_IG04: ; bbWeight=1, gcrefRegs=00A4 {rdx rbp rdi}, byrefRegs=0008 {rbx}, byref
- mov rcx, rbx
- ; byrRegs +[rcx]
- call CORINFO_HELP_CHECKED_ASSIGN_REF
- ; gcrRegs -[rdx]
- ; byrRegs -[rcx]
- mov dword ptr [rbx+0x08], esi
- xor ecx, ecx
- mov dword ptr [rbx+0x0C], ecx
- lea rcx, bword ptr [rbx+0x10]
- ; byrRegs +[rcx]
- mov rdx, rdi
- ; gcrRegs +[rdx]
- call CORINFO_HELP_CHECKED_ASSIGN_REF
- ; gcrRegs -[rdx rdi]
- ; byrRegs -[rcx]
- lea rcx, bword ptr [rbx+0x18]
- ; byrRegs +[rcx]
- mov rdx, rbp
- ; gcrRegs +[rdx]
- call CORINFO_HELP_CHECKED_ASSIGN_REF
- ; gcrRegs -[rdx rbp]
- ; byrRegs -[rcx]
- mov dword ptr [rbx+0x20], r14d
- mov dword ptr [rbx+0x24], r15d
+ ;; size=104 bbWeight=1 PerfScore 22.25
+G_M39825_IG03: ; bbWeight=1, gcrefRegs=0020 {rbp}, byrefRegs=0008 {rbx}, byref
+ ; gcrRegs -[rcx]
+ mov edx, dword ptr [rsp+0x94]
+ ;; size=7 bbWeight=1 PerfScore 1.00
+G_M39825_IG04: ; bbWeight=1, nogc, extend
+ vmovdqu xmm0, xmmword ptr [rsp+0x80]
+ vmovdqu xmmword ptr [rsp+0x40], xmm0
+ mov rax, qword ptr [rsp+0x90]
+ mov qword ptr [rsp+0x50], rax
+ ;; size=28 bbWeight=1 PerfScore 6.00
+G_M39825_IG05: ; bbWeight=1, nogc, extend
+ vmovdqu ymm0, ymmword ptr [rsp+0x30]
+ vmovdqu ymmword ptr [rsp+0x58], ymm0
+ mov rax, qword ptr [rsp+0x50]
+ mov qword ptr [rsp+0x78], rax
+ ;; size=22 bbWeight=1 PerfScore 7.00
+G_M39825_IG06: ; bbWeight=1, gcrefRegs=0020 {rbp}, byrefRegs=0008 {rbx}, byref
+ mov gword ptr [rsp+0x58], rbp
+ mov rdi, rbx
+ ; byrRegs +[rdi]
+ lea rsi, bword ptr [rsp+0x58]
+ ; byrRegs +[rsi]
+ call CORINFO_HELP_ASSIGN_BYREF
+ ; gcrRegs -[rbp]
+ movsq
+ call CORINFO_HELP_ASSIGN_BYREF
+ call CORINFO_HELP_ASSIGN_BYREF
+ movsq
+ mov dword ptr [rbx+0x08], edx
+ xor eax, eax
+ mov dword ptr [rbx+0x0C], eax
mov rax, rbx
; byrRegs +[rax]
- ;; size=51 bbWeight=1 PerfScore 9.25
-G_M39825_IG05: ; bbWeight=1, epilog, nogc, extend
- add rsp, 72
+ ;; size=43 bbWeight=1 PerfScore 9.25
+G_M39825_IG07: ; bbWeight=1, epilog, nogc, extend
+ vzeroupper
+ add rsp, 152
pop rbx
pop rbp
pop rsi
pop rdi
- pop r14
- pop r15
ret
- ;; size=13 bbWeight=1 PerfScore 4.25
-G_M39825_IG06: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0002 {rcx}, byrefRegs=0008 {rbx}, gcvars, byref, isz
+ ;; size=15 bbWeight=1 PerfScore 4.25
+G_M39825_IG08: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0002 {rcx}, byrefRegs=0008 {rbx}, gcvars, byref
; gcrRegs +[rcx]
- ; byrRegs -[rax]
+ ; byrRegs -[rax rsi rdi]
mov rax, qword ptr [rcx]
mov rax, qword ptr [rax+0x60]
call [rax+0x38]<unknown method>
; gcrRegs -[rcx] +[rax]
; gcr arg pop 0
- mov rdx, rax
- ; gcrRegs +[rdx]
- jmp SHORT G_M39825_IG03
- ;; size=15 bbWeight=0 PerfScore 0.00
-G_M39825_IG07: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=0008 {rbx}, byref, isz
- ; gcrRegs -[rax rdx]
- xor rdx, rdx
- ; gcrRegs +[rdx]
- xor esi, esi
- xor rdi, rdi
- ; gcrRegs +[rdi]
+ mov rbp, rax
+ ; gcrRegs +[rbp]
+ jmp G_M39825_IG03
+ ;; size=18 bbWeight=0 PerfScore 0.00
+G_M39825_IG09: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=0008 {rbx}, byref, isz
+ ; gcrRegs -[rax rbp]
+ vxorps ymm0, ymm0, ymm0
+ vmovdqu ymmword ptr [rsp+0x58], ymm0
+ vmovdqu xmmword ptr [rsp+0x70], xmm0
xor rbp, rbp
; gcrRegs +[rbp]
- xor r14d, r14d
- xor r15d, r15d
- jmp SHORT G_M39825_IG04
- ;; size=16 bbWeight=0 PerfScore 0.00
+ xor edx, edx
+ jmp SHORT G_M39825_IG06
+ ;; size=22 bbWeight=0 PerfScore 0.00
-; Total bytes of code 252, prolog size 29, PerfScore 52.58, instruction count 70, allocated bytes for code 252 (MethodHash=6d18646e) for method Microsoft.CodeAnalysis.CSharp.CSharpSyntaxNode:GetLeadingTrivia():Microsoft.CodeAnalysis.SyntaxTriviaList:this (Tier1)
+; Total bytes of code 305, prolog size 43, PerfScore 61.83, instruction count 70, allocated bytes for code 305 (MethodHash=6d18646e) for method Microsoft.CodeAnalysis.CSharp.CSharpSyntaxNode:GetLeadingTrivia():Microsoft.CodeAnalysis.SyntaxTriviaList:this (Tier1) This case is similar to the previous one. We previously regularly promoted |
Regression@@ -35,14 +35,13 @@
;* V25 tmp17 [V25 ] ( 0, 0 ) simd12 -> zero-ref "field V04._simdVector (fldOffset=0x0)" P-INDEP
;* V26 tmp18 [V26 ] ( 0, 0 ) simd12 -> zero-ref "field V05._simdVector (fldOffset=0x0)" P-INDEP
; V27 tmp19 [V27,T11] ( 2, 2 ) simd12 -> mm0 "field V11._simdVector (fldOffset=0x0)" P-INDEP
-; V28 tmp20 [V28 ] ( 3, 1.50) simd12 -> [rsp+0x38] do-not-enreg[XS] addr-exposed "field V14._simdVector (fldOffset=0x0)" P-DEP
-;* V29 tmp21 [V29,T13] ( 0, 0 ) simd12 -> zero-ref "field V18._simdVector (fldOffset=0x0)" P-INDEP
-; V30 tmp22 [V30,T12] ( 2, 2 ) simd12 -> mm0 "field V20._simdVector (fldOffset=0x0)" P-INDEP
+;* V28 tmp20 [V28,T13] ( 0, 0 ) simd12 -> zero-ref "field V18._simdVector (fldOffset=0x0)" P-INDEP
+; V29 tmp21 [V29,T12] ( 2, 2 ) simd12 -> mm0 "field V20._simdVector (fldOffset=0x0)" P-INDEP
+;* V30 tmp22 [V30 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref" <Benchmarks.SIMD.RayTracer.Vector>
;* V31 tmp23 [V31 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref" <Benchmarks.SIMD.RayTracer.Vector>
;* V32 tmp24 [V32 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref" <Benchmarks.SIMD.RayTracer.Vector>
-;* V33 tmp25 [V33 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref" <Benchmarks.SIMD.RayTracer.Vector>
-; V34 tmp26 [V34 ] ( 4, 2 ) struct (16) [rsp+0x28] do-not-enreg[XS] addr-exposed "by-value struct argument" <Benchmarks.SIMD.RayTracer.Vector>
-; V35 tmp27 [V35,T07] ( 2, 1.56) ref -> rcx single-def "argument with side effect"
+; V33 tmp25 [V33 ] ( 4, 2 ) struct (16) [rsp+0x28] do-not-enreg[XS] addr-exposed "by-value struct argument" <Benchmarks.SIMD.RayTracer.Vector>
+; V34 tmp26 [V34,T07] ( 2, 1.56) ref -> rcx single-def "argument with side effect"
;
; Lcl frame size = 104
@@ -90,7 +89,7 @@ G_M26236_IG03: ; bbWeight=0.39, gcrefRegs=C080 {rdi r14 r15}, byrefRegs=0
; gcr arg pop 0
vmovsd qword ptr [rsp+0x48], xmm0
;; size=36 bbWeight=0.39 PerfScore 4.88
-G_M26236_IG04: ; bbWeight=1, gcrefRegs=4080 {rdi r14}, byrefRegs=0068 {rbx rbp rsi}, byref, isz
+G_M26236_IG04: ; bbWeight=1, gcrefRegs=4080 {rdi r14}, byrefRegs=0068 {rbx rbp rsi}, byref
vxorps xmm1, xmm1, xmm1
vmovdqu xmmword ptr [rsp+0x50], xmm1
vmovdqu xmmword ptr [rsp+0x58], xmm1
@@ -112,8 +111,8 @@ G_M26236_IG04: ; bbWeight=1, gcrefRegs=4080 {rdi r14}, byrefRegs=0068 {rb
; byrRegs -[rbx rbp]
; gcr arg pop 0
test rax, rax
- je SHORT G_M26236_IG09
- ;; size=69 bbWeight=1 PerfScore 19.58
+ je G_M26236_IG09
+ ;; size=73 bbWeight=1 PerfScore 19.58
G_M26236_IG05: ; bbWeight=0.34, gcrefRegs=4081 {rax rdi r14}, byrefRegs=0040 {rsi}, byref
mov ebx, dword ptr [rsp+0xD8]
inc ebx
@@ -130,7 +129,8 @@ G_M26236_IG05: ; bbWeight=0.34, gcrefRegs=4081 {rax rdi r14}, byrefRegs=0
; gcr arg pop 0
;; size=33 bbWeight=0.34 PerfScore 2.19
G_M26236_IG06: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref
- vmovups xmm0, xmmword ptr [rsp+0x38]
+ vmovsd xmm0, qword ptr [rsp+0x38]
+ vinsertps xmm0, xmm0, dword ptr [rsp+0x40], 40
vcvtsd2ss xmm1, xmm1, qword ptr [rsp+0x48]
vbroadcastss xmm1, xmm1
vmulps xmm0, xmm1, xmm0
@@ -138,7 +138,7 @@ G_M26236_IG06: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byr
vextractps dword ptr [rsi+0x08], xmm0, 2
mov rax, rsi
; byrRegs +[rax]
- ;; size=35 bbWeight=1 PerfScore 19.25
+ ;; size=43 bbWeight=1 PerfScore 21.25
G_M26236_IG07: ; bbWeight=1, epilog, nogc, extend
add rsp, 104
pop rbx
@@ -169,11 +169,12 @@ G_M26236_IG09: ; bbWeight=0.16, gcrefRegs=0000 {}, byrefRegs=0040 {rsi},
; gcrRegs -[rdi r14]
; byrRegs -[rbx rbp]
vxorps xmm0, xmm0, xmm0
- vmovups xmmword ptr [rsp+0x38], xmm0
+ vmovsd qword ptr [rsp+0x38], xmm0
+ vmovss dword ptr [rsp+0x40], xmm0
jmp SHORT G_M26236_IG06
- ;; size=12 bbWeight=0.16 PerfScore 0.54
+ ;; size=18 bbWeight=0.16 PerfScore 0.71
-; Total bytes of code 302, prolog size 12, PerfScore 69.54, instruction count 77, allocated bytes for code 302 (MethodHash=03159983) for method Benchmarks.SIMD.RayTracer.RayTracer:GetReflectionColor(Benchmarks.SIMD.RayTracer.SceneObject,Benchmarks.SIMD.RayTracer.Vector,Benchmarks.SIMD.RayTracer.Vector,Benchmarks.SIMD.RayTracer.Vector,Benchmarks.SIMD.RayTracer.Scene,int):Benchmarks.SIMD.RayTracer.Color:this (Tier1)
+; Total bytes of code 320, prolog size 12, PerfScore 71.70, instruction count 79, allocated bytes for code 320 (MethodHash=03159983) for method Benchmarks.SIMD.RayTracer.RayTracer:GetReflectionColor(Benchmarks.SIMD.RayTracer.SceneObject,Benchmarks.SIMD.RayTracer.Vector,Benchmarks.SIMD.RayTracer.Vector,Benchmarks.SIMD.RayTracer.Vector,Benchmarks.SIMD.RayTracer.Scene,int):Benchmarks.SIMD.RayTracer.Color:this (Tier1) Looks like we promote a |
libraries_tests.run.windows.x64.Release.mch System.Number:Int128DivMod1E19
@@ -10,18 +10,18 @@
; 0 inlinees with PGO data; 8 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T00] ( 6, 6 ) byref -> rbx single-def
+; V00 arg0 [V00,T00] ( 5, 5 ) byref -> rbx single-def
;* V01 loc0 [V01 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op <System.UInt128>
;* V02 loc1 [V02 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op <System.UInt128>
-;* V03 loc2 [V03 ] ( 0, 0 ) struct (32) zero-ref do-not-enreg[S] <System.ValueTuple`2[System.UInt128,System.UInt128]>
+; V03 loc2 [V03,T03] ( 2, 2 ) struct (32) [rsp+0x78] do-not-enreg[S] <System.ValueTuple`2[System.UInt128,System.UInt128]>
; V04 OutArgs [V04 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V05 tmp1 [V05 ] ( 0, 0 ) struct (16) zero-ref "Inlining Arg" <System.UInt128>
;* V06 tmp2 [V06 ] ( 0, 0 ) struct (16) zero-ref "Inlining Arg" <System.UInt128>
-; V07 tmp3 [V07 ] ( 4, 4 ) struct (16) [rsp+0x48] do-not-enreg[XS] addr-exposed "Inline stloc first use temp" <System.UInt128>
+; V07 tmp3 [V07 ] ( 3, 3 ) struct (16) [rsp+0x68] do-not-enreg[XS] addr-exposed "Inline stloc first use temp" <System.UInt128>
;* V08 tmp4 [V08 ] ( 0, 0 ) struct (16) zero-ref "spilled call-like call argument" <System.UInt128>
-;* V09 tmp5 [V09 ] ( 0, 0 ) struct (32) zero-ref do-not-enreg[S] ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.UInt128,System.UInt128]>
+; V09 tmp5 [V09,T01] ( 2, 4 ) struct (32) [rsp+0x48] do-not-enreg[S] ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.UInt128,System.UInt128]>
;* V10 tmp6 [V10 ] ( 0, 0 ) struct (16) zero-ref "spilled call-like call argument" <System.UInt128>
-; V11 tmp7 [V11,T02] ( 2, 2 ) long -> rax ld-addr-op "Inline ldloca(s) first use temp"
+; V11 tmp7 [V11,T04] ( 2, 2 ) long -> rax ld-addr-op "Inline ldloca(s) first use temp"
;* V12 tmp8 [V12 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "NewObj constructor temp" <System.UInt128>
;* V13 tmp9 [V13 ] ( 0, 0 ) long -> zero-ref "Inline return value spill temp"
;* V14 tmp10 [V14 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
@@ -30,7 +30,7 @@
;* V17 tmp13 [V17 ] ( 0, 0 ) long -> zero-ref "impAppendStmt"
;* V18 tmp14 [V18 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V19 tmp15 [V19 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
-; V20 tmp16 [V20,T03] ( 2, 2 ) long -> rax "Inline stloc first use temp"
+; V20 tmp16 [V20,T05] ( 2, 2 ) long -> rax "Inline stloc first use temp"
;* V21 tmp17 [V21 ] ( 0, 0 ) long -> zero-ref "Inline stloc first use temp"
;* V22 tmp18 [V22 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "NewObj constructor temp" <System.UInt128>
;* V23 tmp19 [V23 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
@@ -38,40 +38,34 @@
;* V25 tmp21 [V25,T08] ( 0, 0 ) long -> zero-ref "field V01._upper (fldOffset=0x8)" P-INDEP
;* V26 tmp22 [V26 ] ( 0, 0 ) long -> zero-ref "field V02._lower (fldOffset=0x0)" P-INDEP
;* V27 tmp23 [V27 ] ( 0, 0 ) long -> zero-ref "field V02._upper (fldOffset=0x8)" P-INDEP
-; V28 tmp24 [V28,T01] ( 3, 3 ) long -> rsi "field V05._lower (fldOffset=0x0)" P-INDEP
-; V29 tmp25 [V29,T04] ( 2, 2 ) long -> rdx "field V05._upper (fldOffset=0x8)" P-INDEP
+; V28 tmp24 [V28,T02] ( 3, 3 ) long -> rsi "field V05._lower (fldOffset=0x0)" P-INDEP
+; V29 tmp25 [V29,T06] ( 2, 2 ) long -> rdx "field V05._upper (fldOffset=0x8)" P-INDEP
;* V30 tmp26 [V30 ] ( 0, 0 ) long -> zero-ref "field V06._lower (fldOffset=0x0)" P-INDEP
;* V31 tmp27 [V31 ] ( 0, 0 ) long -> zero-ref "field V06._upper (fldOffset=0x8)" P-INDEP
-; V32 tmp28 [V32 ] ( 3, 3 ) long -> [rsp+0x48] do-not-enreg[X] addr-exposed "field V07._lower (fldOffset=0x0)" P-DEP
-; V33 tmp29 [V33 ] ( 2, 2 ) long -> [rsp+0x50] do-not-enreg[X] addr-exposed "field V07._upper (fldOffset=0x8)" P-DEP
-;* V34 tmp30 [V34 ] ( 0, 0 ) long -> zero-ref "field V08._lower (fldOffset=0x0)" P-INDEP
-;* V35 tmp31 [V35 ] ( 0, 0 ) long -> zero-ref "field V08._upper (fldOffset=0x8)" P-INDEP
-;* V36 tmp32 [V36 ] ( 0, 0 ) long -> zero-ref "field V10._lower (fldOffset=0x0)" P-INDEP
-;* V37 tmp33 [V37 ] ( 0, 0 ) long -> zero-ref "field V10._upper (fldOffset=0x8)" P-INDEP
-;* V38 tmp34 [V38 ] ( 0, 0 ) long -> zero-ref "field V12._lower (fldOffset=0x0)" P-INDEP
-;* V39 tmp35 [V39 ] ( 0, 0 ) long -> zero-ref "field V12._upper (fldOffset=0x8)" P-INDEP
-;* V40 tmp36 [V40 ] ( 0, 0 ) long -> zero-ref "field V22._lower (fldOffset=0x0)" P-INDEP
-;* V41 tmp37 [V41 ] ( 0, 0 ) long -> zero-ref "field V22._upper (fldOffset=0x8)" P-INDEP
-;* V42 tmp38 [V42 ] ( 0, 0 ) long -> zero-ref "V03.[000..008)"
-;* V43 tmp39 [V43 ] ( 0, 0 ) long -> zero-ref "V03.[008..016)"
-;* V44 tmp40 [V44 ] ( 0, 0 ) long -> zero-ref "V03.[016..024)"
-;* V45 tmp41 [V45 ] ( 0, 0 ) long -> zero-ref "V03.[024..032)"
-; V46 tmp42 [V46,T05] ( 2, 2 ) long -> rcx "V09.[000..008)"
-; V47 tmp43 [V47,T06] ( 2, 2 ) long -> rdx "V09.[008..016)"
-;* V48 tmp44 [V48 ] ( 0, 0 ) long -> zero-ref "V09.[016..024)"
-;* V49 tmp45 [V49 ] ( 0, 0 ) long -> zero-ref "V09.[024..032)"
-; V50 tmp46 [V50 ] ( 3, 6 ) struct (16) [rsp+0x30] do-not-enreg[XSF] addr-exposed "by-value struct argument" <System.UInt128>
-; V51 tmp47 [V51 ] ( 3, 6 ) struct (16) [rsp+0x20] do-not-enreg[XSF] addr-exposed "by-value struct argument" <System.UInt128>
+;* V32 tmp28 [V32 ] ( 0, 0 ) long -> zero-ref "field V08._lower (fldOffset=0x0)" P-INDEP
+;* V33 tmp29 [V33 ] ( 0, 0 ) long -> zero-ref "field V08._upper (fldOffset=0x8)" P-INDEP
+;* V34 tmp30 [V34 ] ( 0, 0 ) long -> zero-ref "field V10._lower (fldOffset=0x0)" P-INDEP
+;* V35 tmp31 [V35 ] ( 0, 0 ) long -> zero-ref "field V10._upper (fldOffset=0x8)" P-INDEP
+;* V36 tmp32 [V36 ] ( 0, 0 ) long -> zero-ref "field V12._lower (fldOffset=0x0)" P-INDEP
+;* V37 tmp33 [V37 ] ( 0, 0 ) long -> zero-ref "field V12._upper (fldOffset=0x8)" P-INDEP
+;* V38 tmp34 [V38 ] ( 0, 0 ) long -> zero-ref "field V22._lower (fldOffset=0x0)" P-INDEP
+;* V39 tmp35 [V39 ] ( 0, 0 ) long -> zero-ref "field V22._upper (fldOffset=0x8)" P-INDEP
+;* V40 tmp36 [V40 ] ( 0, 0 ) long -> zero-ref "V03.[016..024)"
+;* V41 tmp37 [V41 ] ( 0, 0 ) long -> zero-ref "V03.[024..032)"
+;* V42 tmp38 [V42 ] ( 0, 0 ) long -> zero-ref "V09.[016..024)"
+;* V43 tmp39 [V43 ] ( 0, 0 ) long -> zero-ref "V09.[024..032)"
+; V44 tmp40 [V44 ] ( 3, 6 ) struct (16) [rsp+0x30] do-not-enreg[XSF] addr-exposed "by-value struct argument" <System.UInt128>
+; V45 tmp41 [V45 ] ( 3, 6 ) struct (16) [rsp+0x20] do-not-enreg[XSF] addr-exposed "by-value struct argument" <System.UInt128>
;
-; Lcl frame size = 88
+; Lcl frame size = 152
G_M60303_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
push rsi
push rbx
- sub rsp, 88
+ sub rsp, 152
mov rbx, rcx
; byrRegs +[rbx]
- ;; size=9 bbWeight=1 PerfScore 2.50
+ ;; size=12 bbWeight=1 PerfScore 2.50
G_M60303_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0008 {rbx}, byref
mov rsi, qword ptr [rbx]
mov rdx, qword ptr [rbx+0x08]
@@ -83,10 +77,10 @@ G_M60303_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0008 {rbx}, byr
mov qword ptr [rsp+0x28], rdx
lea rdx, [rsp+0x30]
lea r8, [rsp+0x20]
- lea rcx, [rsp+0x48]
+ lea rcx, [rsp+0x68]
call [System.UInt128:op_Division(System.UInt128,System.UInt128):System.UInt128]
; gcr arg pop 0
- mov rdx, qword ptr [rsp+0x48]
+ mov rdx, qword ptr [rsp+0x68]
mov rax, 0xD1FFAB1E
lea rcx, [rsp+0x40]
mulx rax, r8, rax
@@ -94,19 +88,21 @@ G_M60303_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0008 {rbx}, byr
mov rax, qword ptr [rsp+0x40]
sub rsi, rax
mov rax, rsi
- mov rcx, qword ptr [rsp+0x48]
- mov rdx, qword ptr [rsp+0x50]
- mov qword ptr [rbx], rcx
- mov qword ptr [rbx+0x08], rdx
- ;; size=116 bbWeight=1 PerfScore 24.25
+ vmovups xmm0, xmmword ptr [rsp+0x68]
+ vmovups xmmword ptr [rsp+0x48], xmm0
+ vmovups xmm0, xmmword ptr [rsp+0x48]
+ vmovups xmmword ptr [rsp+0x78], xmm0
+ vmovups xmm0, xmmword ptr [rsp+0x78]
+ vmovups xmmword ptr [rbx], xmm0
+ ;; size=133 bbWeight=1 PerfScore 33.25
G_M60303_IG03: ; bbWeight=1, epilog, nogc, extend
- add rsp, 88
+ add rsp, 152
pop rbx
pop rsi
ret
- ;; size=7 bbWeight=1 PerfScore 2.25
+ ;; size=10 bbWeight=1 PerfScore 2.25
-; Total bytes of code 132, prolog size 6, PerfScore 29.00, instruction count 32, allocated bytes for code 132 (MethodHash=49051470) for method System.Number:Int128DivMod1E19(byref):ulong (Tier1)
+; Total bytes of code 155, prolog size 9, PerfScore 38.00, instruction count 34, allocated bytes for code 155 (MethodHash=49051470) for method System.Number:Int128DivMod1E19(byref):ulong (Tier1) Same underlying issue as #104439 (comment) and likely to be improved when we enable retbuf definitions more widely than for compiler-created temps. |
System.Formats.Asn1.AsnWriter:EncodedValueEquals(System.ReadOnlySpan`1[ubyte]):ubyte@@ -10,11 +10,11 @@
; 1 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
-; V00 this [V00,T01] ( 3, 3 ) ref -> rcx this class-hnd single-def <System.Formats.Asn1.AsnWriter>
+; V00 this [V00,T02] ( 3, 3 ) ref -> rcx this class-hnd single-def <System.Formats.Asn1.AsnWriter>
; V01 arg1 [V01,T00] ( 4, 8 ) byref -> rbx single-def
; V02 OutArgs [V02 ] ( 1, 1 ) struct (32) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V03 tmp1 [V03 ] ( 3, 6 ) struct (16) [rsp+0x20] do-not-enreg[HS] must-init hidden-struct-arg "spilled call-like call argument" <System.ReadOnlySpan`1[ubyte]>
-; V04 tmp2 [V04,T05] ( 3, 2 ) ubyte -> rax "Inline return value spill temp"
+; V03 tmp1 [V03,T01] ( 3, 6 ) struct (16) [rsp+0x20] do-not-enreg[HS] must-init hidden-struct-arg "spilled call-like call argument" <System.ReadOnlySpan`1[ubyte]>
+; V04 tmp2 [V04,T04] ( 3, 2 ) ubyte -> rax "Inline return value spill temp"
;* V05 tmp3 [V05 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
;* V06 tmp4 [V06 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V07 tmp5 [V07 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
@@ -27,21 +27,19 @@
;* V14 tmp12 [V14 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
;* V15 tmp13 [V15 ] ( 0, 0 ) byref -> zero-ref "field V01._reference (fldOffset=0x0)" P-INDEP
;* V16 tmp14 [V16 ] ( 0, 0 ) int -> zero-ref "field V01._length (fldOffset=0x8)" P-INDEP
-; V17 tmp15 [V17,T03] ( 2, 3 ) byref -> [rsp+0x20] do-not-enreg[H] hidden-struct-arg "field V03._reference (fldOffset=0x0)" P-DEP
-; V18 tmp16 [V18,T04] ( 2, 3 ) int -> [rsp+0x28] do-not-enreg[H] hidden-struct-arg "field V03._length (fldOffset=0x8)" P-DEP
-;* V19 tmp17 [V19 ] ( 0, 0 ) byref -> zero-ref single-def "field V05._reference (fldOffset=0x0)" P-INDEP
-;* V20 tmp18 [V20 ] ( 0, 0 ) int -> zero-ref "field V05._length (fldOffset=0x8)" P-INDEP
-; V21 tmp19 [V21,T06] ( 2, 2 ) byref -> rdx single-def "field V07._reference (fldOffset=0x0)" P-INDEP
-; V22 tmp20 [V22,T02] ( 3, 3 ) int -> r8 "field V07._length (fldOffset=0x8)" P-INDEP
-;* V23 tmp21 [V23 ] ( 0, 0 ) byref -> zero-ref single-def "field V11._reference (fldOffset=0x0)" P-INDEP
-;* V24 tmp22 [V24 ] ( 0, 0 ) int -> zero-ref "field V11._length (fldOffset=0x8)" P-INDEP
-;* V25 tmp23 [V25 ] ( 0, 0 ) byref -> zero-ref single-def "field V12._reference (fldOffset=0x0)" P-INDEP
-;* V26 tmp24 [V26 ] ( 0, 0 ) int -> zero-ref "field V12._length (fldOffset=0x8)" P-INDEP
-;* V27 tmp25 [V27 ] ( 0, 0 ) byref -> zero-ref "field V13._reference (fldOffset=0x0)" P-INDEP
-;* V28 tmp26 [V28 ] ( 0, 0 ) int -> zero-ref "field V13._length (fldOffset=0x8)" P-INDEP
-;* V29 tmp27 [V29 ] ( 0, 0 ) byref -> zero-ref "field V14._reference (fldOffset=0x0)" P-INDEP
-;* V30 tmp28 [V30 ] ( 0, 0 ) int -> zero-ref "field V14._length (fldOffset=0x8)" P-INDEP
-;* V31 tmp29 [V31 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref" <System.ReadOnlySpan`1[ubyte]>
+;* V17 tmp15 [V17 ] ( 0, 0 ) byref -> zero-ref single-def "field V05._reference (fldOffset=0x0)" P-INDEP
+; V18 tmp16 [V18,T07] ( 2, 2 ) int -> r8 "field V05._length (fldOffset=0x8)" P-INDEP
+; V19 tmp17 [V19,T05] ( 2, 2 ) byref -> rdx single-def "field V07._reference (fldOffset=0x0)" P-INDEP
+; V20 tmp18 [V20,T03] ( 3, 3 ) int -> rcx "field V07._length (fldOffset=0x8)" P-INDEP
+; V21 tmp19 [V21,T06] ( 2, 2 ) byref -> rax single-def "field V11._reference (fldOffset=0x0)" P-INDEP
+;* V22 tmp20 [V22 ] ( 0, 0 ) int -> zero-ref "field V11._length (fldOffset=0x8)" P-INDEP
+;* V23 tmp21 [V23 ] ( 0, 0 ) byref -> zero-ref single-def "field V12._reference (fldOffset=0x0)" P-INDEP
+;* V24 tmp22 [V24 ] ( 0, 0 ) int -> zero-ref "field V12._length (fldOffset=0x8)" P-INDEP
+;* V25 tmp23 [V25 ] ( 0, 0 ) byref -> zero-ref "field V13._reference (fldOffset=0x0)" P-INDEP
+;* V26 tmp24 [V26 ] ( 0, 0 ) int -> zero-ref "field V13._length (fldOffset=0x8)" P-INDEP
+;* V27 tmp25 [V27 ] ( 0, 0 ) byref -> zero-ref "field V14._reference (fldOffset=0x0)" P-INDEP
+;* V28 tmp26 [V28 ] ( 0, 0 ) int -> zero-ref "field V14._length (fldOffset=0x8)" P-INDEP
+;* V29 tmp27 [V29 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref" <System.ReadOnlySpan`1[ubyte]>
;
; Lcl frame size = 48
@@ -59,17 +57,21 @@ G_M13570_IG02: ; bbWeight=1, gcrefRegs=0002 {rcx}, byrefRegs=0008 {rbx},
call [System.Formats.Asn1.AsnWriter:EncodeAsSpan():System.ReadOnlySpan`1[ubyte]:this]
; gcrRegs -[rcx]
; gcr arg pop 0
+ mov r8d, dword ptr [rsp+0x28]
mov rdx, bword ptr [rbx]
; byrRegs +[rdx]
- mov r8d, dword ptr [rbx+0x08]
- cmp dword ptr [rsp+0x28], r8d
+ mov ecx, dword ptr [rbx+0x08]
+ cmp r8d, ecx
jne SHORT G_M13570_IG05
- mov rcx, bword ptr [rsp+0x20]
+ mov rax, bword ptr [rsp+0x20]
+ ; byrRegs +[rax]
+ mov r8d, ecx
+ mov rcx, rax
; byrRegs +[rcx]
call [<unknown method>]
- ; byrRegs -[rcx rdx rbx]
+ ; byrRegs -[rax rcx rdx rbx]
; gcr arg pop 0
- ;; size=36 bbWeight=1 PerfScore 14.50
+ ;; size=44 bbWeight=1 PerfScore 14.25
G_M13570_IG03: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
nop
;; size=1 bbWeight=1 PerfScore 0.25
@@ -83,7 +85,7 @@ G_M13570_IG05: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {
jmp SHORT G_M13570_IG03
;; size=4 bbWeight=0 PerfScore 0.00
-; Total bytes of code 62, prolog size 12, PerfScore 19.25, instruction count 19, allocated bytes for code 62 (MethodHash=2ff0cafd) for method System.Formats.Asn1.AsnWriter:EncodedValueEquals(System.ReadOnlySpan`1[ubyte]):ubyte:this (Tier1)
+; Total bytes of code 70, prolog size 12, PerfScore 19.00, instruction count 22, allocated bytes for code 70 (MethodHash=2ff0cafd) for method System.Formats.Asn1.AsnWriter:EncodedValueEquals(System.ReadOnlySpan`1[ubyte]):ubyte:this (Tier1) Physical promotion decides not to promote the retbuf fields: Looking for induced accesses with 1 stores between candidates
Induced accesses for V03
byref @ 000
#: (1, 10000)
int @ 008
#: (1, 10000)
Picking induced promotions for V03
Evaluating access byref @ 000
Single write-back cost: 3
Write backs: 0
Read backs: 10000
Estimated cycle improvement: -0.5 cycles per invocation
Estimated size improvement: -2 bytes
Disqualifying replacement
Evaluating access int @ 008
Single write-back cost: 3
Write backs: 0
Read backs: 10000
Estimated cycle improvement: -0.5 cycles per invocation
Estimated size improvement: -2 bytes
Disqualifying replacement Looks like with regular promotion we do some copy prop with a promote retbuf field and then manage to do containment with it (due to its DNER status). Without promotion we have
and we do not manage to contain this. Some kind of forward sub (like I was doing for the multi-use LIR temp experiment) would resolve this. |
Related Regressions: |
These locals end up being dependently promoted. Skip them and allow physical promotion to handle them instead.