Skip to content

Commit

Permalink
Unify JIT interface getTypeLayout between VM and crossgen2; refine wh…
Browse files Browse the repository at this point in the history
…at is considered "significant padding" (#88238)

* Stop considering auto layout types to have significant padding in the VM. This was already the behavior in crossgen2. This fixes one of the points of #71711.
* Remove special case where we never considered structs with GC pointers to have significant padding. After the above change this has no additional diffs.
* Fix the inline array layout expansion; the numFields of the inline array node was computed incorrectly, and the parent indices of descendants were not updated correctly

Example C#:
```csharp
private (long, int) _tuple;

[MethodImpl(MethodImplOptions.NoInlining)]
private void Foo()
{
    _tuple = (5, 10);
}
```

Before:
```asm
;  V00 this         [V00,T00] (  3,  3   )     ref  ->  rcx         this class-hnd single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+00H]   do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02    ] (  4,  8   )  struct (16) [rsp+08H]   do-not-enreg[SB] ld-addr-op "NewObj constructor temp"
;  V03 tmp2         [V03,T01] (  3,  5   )    long  ->  [rsp+08H]   do-not-enreg[] "field V02.Item1 (fldOffset=0x0)" P-DEP
;  V04 tmp3         [V04,T02] (  3,  5   )     int  ->  [rsp+10H]   do-not-enreg[] "field V02.Item2 (fldOffset=0x8)" P-DEP
;
; Lcl frame size = 24

G_M52879_IG01:  ;; offset=0000H
       sub      rsp, 24
       vzeroupper
						;; size=7 bbWeight=1 PerfScore 1.25
G_M52879_IG02:  ;; offset=0007H
       vxorps   xmm0, xmm0, xmm0
       vmovups  xmmword ptr [rsp+08H], xmm0
       mov      qword ptr [rsp+08H], 5
       mov      dword ptr [rsp+10H], 10
       vmovups  xmm0, xmmword ptr [rsp+08H]
       vmovups  xmmword ptr [rcx+08H], xmm0
						;; size=38 bbWeight=1 PerfScore 8.33
G_M52879_IG03:  ;; offset=002DH
       add      rsp, 24
       ret
						;; size=5 bbWeight=1 PerfScore 1.25

; Total bytes of code 50, prolog size 7, PerfScore 15.83, instruction count 10, allocated bytes for code 50 (MethodHash=1da13170) for method Program:Foo():this (FullOpts)
```

After:
```asm
;  V00 this         [V00,T00] (  4,  4   )     ref  ->  rcx         this class-hnd single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [rsp+00H]   do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V02 tmp1         [V02    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "NewObj constructor temp"
;* V03 tmp2         [V03,T01] (  0,  0   )    long  ->  zero-ref    "field V02.Item1 (fldOffset=0x0)" P-INDEP
;* V04 tmp3         [V04,T02] (  0,  0   )     int  ->  zero-ref    "field V02.Item2 (fldOffset=0x8)" P-INDEP
;
; Lcl frame size = 0

G_M52879_IG01:  ;; offset=0000H
						;; size=0 bbWeight=1 PerfScore 0.00
G_M52879_IG02:  ;; offset=0000H
       mov      qword ptr [rcx+08H], 5
       mov      dword ptr [rcx+10H], 10
						;; size=15 bbWeight=1 PerfScore 2.00
G_M52879_IG03:  ;; offset=000FH
       ret
						;; size=1 bbWeight=1 PerfScore 1.00

; Total bytes of code 16, prolog size 0, PerfScore 4.60, instruction count 3, allocated bytes for code 16 (MethodHash=1da13170) for method Program:Foo():this (FullOpts)
```
  • Loading branch information
jakobbotsch authored Jul 1, 2023
1 parent abedbc9 commit 876ab61
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 32 deletions.
46 changes: 31 additions & 15 deletions src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2368,7 +2368,7 @@ private GetTypeLayoutResult GetTypeLayoutHelper(MetadataType type, uint parentIn
parNode->size = (uint)type.GetElementSize().AsInt;
parNode->numFields = 0;
parNode->type = CorInfoType.CORINFO_TYPE_VALUECLASS;
parNode->hasSignificantPadding = false;
parNode->hasSignificantPadding = type.IsExplicitLayout || (type.IsSequentialLayout && type.GetClassLayout().Size != 0);

#if READYTORUN
// The contract of getTypeLayout is carefully crafted to still
Expand All @@ -2381,7 +2381,7 @@ private GetTypeLayoutResult GetTypeLayoutHelper(MetadataType type, uint parentIn
// amenable to the optimizations that this unlocks if they already
// went through EncodeFieldBaseOffset.
//
if (!_compilation.IsLayoutFixedInCurrentVersionBubble(type))
if (!parNode->hasSignificantPadding && !_compilation.IsLayoutFixedInCurrentVersionBubble(type))
{
// For types without fixed layout the JIT is not allowed to
// rely on padding bits being insignificant, since fields could
Expand All @@ -2391,14 +2391,6 @@ private GetTypeLayoutResult GetTypeLayoutHelper(MetadataType type, uint parentIn
}
#endif

if (type.IsExplicitLayout || (type.IsSequentialLayout && type.GetClassLayout().Size != 0) || type.IsInlineArray)
{
if (!type.ContainsGCPointers && !type.IsByRefLike)
{
parNode->hasSignificantPadding = true;
}
}

// The intrinsic SIMD/HW SIMD types have a lot of fields that the JIT does
// not care about since they are considered primitives by the JIT.
if (type.IsIntrinsic)
Expand Down Expand Up @@ -2472,19 +2464,43 @@ private GetTypeLayoutResult GetTypeLayoutHelper(MetadataType type, uint parentIn
int elemSize = fieldType.GetElementSize().AsInt;
int arrSize = type.GetElementSize().AsInt;

// Number of fields added for each element, including all
// subfields. For example, for ValueTuple<int, int>[4]:
// [ 0]: InlineArray parent = -1
// [ 1]: ValueTuple<int, int> parent = 0 -
// [ 2]: int parent = 1 |
// [ 3]: int parent = 1 |
// [ 4]: ValueTuple<int, int> parent = 0 - stride = 3
// [ 5]: int parent = 4
// [ 6]: int parent = 4
// [ 7]: ValueTuple<int, int> parent = 0
// [ 8]: int parent = 7
// [ 9]: int parent = 7
// [10]: ValueTuple<int, int> parent = 0
// [11]: int parent = 10
// [12]: int parent = 10
uint elemFieldsStride = (uint)*numTreeNodes - (structNodeIndex + 1);

// Now duplicate the fields of the previous entry for each
// additional element. For each entry we have to update the
// offset and the parent index.
for (int elemOffset = elemSize; elemOffset < arrSize; elemOffset += elemSize)
{
for (nuint templateTreeNodeIndex = structNodeIndex + 1; templateTreeNodeIndex < treeNodeEnd; templateTreeNodeIndex++)
nuint prevElemStart = *numTreeNodes - elemFieldsStride;
for (nuint i = 0; i < elemFieldsStride; i++)
{
if (*numTreeNodes >= maxTreeNodes)
return GetTypeLayoutResult.Partial;

CORINFO_TYPE_LAYOUT_NODE* treeNode = &treeNodes[(*numTreeNodes)++];
*treeNode = treeNodes[templateTreeNodeIndex];
treeNode->offset += (uint)elemOffset;

parNode->numFields++;
*treeNode = treeNodes[prevElemStart + i];
treeNode->offset += (uint)elemSize;
// The first field points back to the inline array
// and has no bias; the rest of them do.
treeNode->parent += (i == 0) ? 0 : elemFieldsStride;
}

parNode->numFields++;
}
}
}
Expand Down
48 changes: 31 additions & 17 deletions src/coreclr/vm/jitinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2106,20 +2106,9 @@ static GetTypeLayoutResult GetTypeLayoutHelper(
parNode.size = pMT->GetNumInstanceFieldBytes();
parNode.numFields = 0;
parNode.type = CorInfoType::CORINFO_TYPE_VALUECLASS;
parNode.hasSignificantPadding = false;

EEClass* pClass = pMT->GetClass();
if (pClass->IsNotTightlyPacked() && (!pClass->IsManagedSequential() || pClass->HasExplicitSize() || pClass->IsInlineArray()))
{
// Historically on the JIT side we did not consider types with GC
// pointers to have significant padding, even when they have explicit
// layout attributes. This retains the more liberal treatment and
// lets the JIT still optimize these cases.
if (!pMT->ContainsPointers() && pMT != g_TypedReferenceMT)
{
parNode.hasSignificantPadding = true;
}
}
parNode.hasSignificantPadding = pClass->HasExplicitFieldOffsetLayout() || pClass->HasExplicitSize();

// The intrinsic SIMD/HW SIMD types have a lot of fields that the JIT does
// not care about since they are considered primitives by the JIT.
Expand Down Expand Up @@ -2181,21 +2170,46 @@ static GetTypeLayoutResult GetTypeLayoutHelper(
uint32_t elemSize = pFD->GetSize();
uint32_t arrSize = pMT->GetNumInstanceFieldBytes();

// Number of fields added for each element, including all
// subfields. For example, for ValueTuple<int, int>[4]:
// [ 0]: InlineArray
// [ 1]: ValueTuple<int, int> parent = 0 -
// [ 2]: int parent = 1 |
// [ 3]: int parent = 1 |
// [ 4]: ValueTuple<int, int> parent = 0 - stride = 3
// [ 5]: int parent = 4
// [ 6]: int parent = 4
// [ 7]: ValueTuple<int, int> parent = 0
// [ 8]: int parent = 7
// [ 9]: int parent = 7
// [10]: ValueTuple<int, int> parent = 0
// [11]: int parent = 10
// [12]: int parent = 10

unsigned elemFieldsStride = (unsigned)*numTreeNodes - (structNodeIndex + 1);

// Now duplicate the fields of the previous entry for each
// additional element. For each entry we have to update the offset
// and the parent index.
for (uint32_t elemOffset = elemSize; elemOffset < arrSize; elemOffset += elemSize)
{
for (size_t templateTreeNodeIndex = structNodeIndex + 1; templateTreeNodeIndex < treeNodeEnd; templateTreeNodeIndex++)
size_t prevElemStart = *numTreeNodes - elemFieldsStride;
for (size_t i = 0; i < elemFieldsStride; i++)
{
if (*numTreeNodes >= maxTreeNodes)
{
return GetTypeLayoutResult::Partial;
}

CORINFO_TYPE_LAYOUT_NODE& treeNode = treeNodes[(*numTreeNodes)++];
treeNode = treeNodes[templateTreeNodeIndex];
treeNode.offset += elemOffset;

parNode.numFields++;
treeNode = treeNodes[prevElemStart + i];
treeNode.offset += elemSize;
// The first field points back to the inline array and has
// no bias; the rest of them do.
treeNode.parent += (i == 0) ? 0 : elemFieldsStride;
}

parNode.numFields++;
}
}
}
Expand Down

0 comments on commit 876ab61

Please sign in to comment.