-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Mono]: Reduce Mono AOT cross compiler x64 memory footprint. #97096
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<linker> | ||
<assembly fullname="System.Private.CoreLib"> | ||
<type fullname="System.Runtime.Intrinsics.Vector256"> | ||
<method signature="System.Boolean get_IsHardwareAccelerated()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.Vector512"> | ||
<method signature="System.Boolean get_IsHardwareAccelerated()" body="stub" value="false" /> | ||
</type> | ||
</assembly> | ||
</linker> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
<linker> | ||
<assembly fullname="System.Private.CoreLib"> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx2"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx2/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512BW"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512BW/VL"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512BW/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512CD"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512CD/VL"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512CD/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512DQ"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512DQ/VL"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512DQ/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512F"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512F/VL"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512F/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512Vbmi"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512Vbmi/VL"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Avx512Vbmi/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.AvxVnni"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.AvxVnni/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Fma"> | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.Fma/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.X86Serialize"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
<type fullname="System.Runtime.Intrinsics.X86.X86Serialize/X64"> | ||
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" /> | ||
</type> | ||
</assembly> | ||
</linker> |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1178,6 +1178,20 @@ create_class_instance (const char* name_space, const char *name, MonoType *param | |
return ivector_inst; | ||
} | ||
|
||
static gboolean | ||
is_supported_vector_primitive_type (MonoType *type) | ||
{ | ||
gboolean constrained_generic_param = (type->type == MONO_TYPE_VAR || type->type == MONO_TYPE_MVAR); | ||
|
||
if (constrained_generic_param && type->data.generic_param->gshared_constraint && MONO_TYPE_IS_VECTOR_PRIMITIVE (type->data.generic_param->gshared_constraint)) | ||
return TRUE; | ||
|
||
if (MONO_TYPE_IS_VECTOR_PRIMITIVE (type)) | ||
return TRUE; | ||
|
||
return FALSE; | ||
} | ||
|
||
static guint16 sri_vector_methods [] = { | ||
SN_Abs, | ||
SN_Add, | ||
|
@@ -1423,8 +1437,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi | |
return NULL; | ||
|
||
if (vector_size == 256 || vector_size == 512) | ||
return NULL; | ||
return NULL; | ||
|
||
// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 | ||
#ifdef TARGET_ARM64 | ||
if (!COMPILE_LLVM (cfg)) { | ||
|
@@ -2477,6 +2491,12 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f | |
g_free (name); | ||
} | ||
|
||
if (id == SN_get_IsSupported) { | ||
MonoInst *ins; | ||
EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0); | ||
return ins; | ||
} | ||
|
||
// Apart from filtering out non-primitive types this also filters out shared generic instance types like: T_BYTE which cannot be intrinsified | ||
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) { | ||
// Happens often in gshared code | ||
|
@@ -3199,6 +3219,11 @@ emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSig | |
type = m_class_get_byval_arg (klass); | ||
etype = mono_class_get_context (klass)->class_inst->type_argv [0]; | ||
|
||
if (id == SN_get_IsSupported) { | ||
EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0); | ||
return ins; | ||
} | ||
|
||
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) | ||
return NULL; | ||
|
||
|
@@ -6118,11 +6143,37 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i | |
decompose_vtype_opt_store_arg (cfg, bb, ins, &(ins->dreg)); | ||
} | ||
} | ||
|
||
gboolean | ||
mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod) | ||
{ | ||
/* | ||
* If a method has been marked with aggressive inlining, check if we support | ||
* aggressive inlining of the intrinsics type, if not, ignore aggressive inlining | ||
* since it could end up inlining a large amount of code that most likely will end | ||
* up as dead code. | ||
*/ | ||
Comment on lines
+6150
to
+6155
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you help me understand why this is needed on Mono a bit? The software fallback for V128/V256/V512 is currently implemented as doing 2x operations on the lower/upper halves (except for a couple of methods like So I would imagine that on any platform where So I'd only expect that this is needed for V64 on x86/x64 where there is no acceleration and it has to hit the scalar fallback with the loop over the elements. For RyuJIT this loop isn't an issue due to the generic specialization we do, allowing us to get it down to only the code path that is actually used. I believe this isn't possible for Mono today and is non-trivial to add, so the skipping of inlining does make sense in that regard. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So without disabling aggressive inlining I ended up with methods that where huge, and not doing aggressive inline on these types (but still honor inline size limits) made them sane again. I will need to re-iterate around that change in order to tell exactly what happened with our inliner in the aggressive case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just validated this, with disabling the aggressive inlining as above a .net9 full AOT of S.P.C takes ~1.7 GB of memory, and not doing this will consume an additional 600 MB of memory, so I believe its worth preventing aggressive inlining for these types that are not hardware accelerated on any of the Mono supported platforms. I didn't add V64 since it seems to be handled a little differently on at least ARM case. I won't have bandwidth at the moment to do more deep analysis around why aggressive inlining of these template types cause that large increase in memory so I think doing this change, at least short term is worth it, we could probably file an issue around the bloat of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For interpreter I'm handling this in a general fashion by detecting early dead pieces of code and not inlining any of the calls there. #97514. It is possible that a similar approach for jit can produce further improvements without having to special case classes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did try to experiment with some dead code elimination, but that cause issues and doesn't work with our llvm codegen, so we explicitly turn that pass off for code that will be passed over to llvm. Instead I made sure we could do more in linker, but still these methods still explode and survives first simple llvm optimizations pass we now do in function manager pass, so feels like something in the inlined code prevents elimination until very late in the opt chain. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trying to reenable branch optimizations when using llvm in this pr: |
||
if (!strcmp (m_class_get_name_space (cmethod->klass), "System.Runtime.Intrinsics")) { | ||
if (!strncmp(m_class_get_name (cmethod->klass), "Vector", 6)) { | ||
const char *vector_type = m_class_get_name (cmethod->klass) + 6; | ||
if (!strcmp(vector_type, "256`1") || !strcmp(vector_type, "512`1")) | ||
return TRUE; | ||
} | ||
} | ||
return FALSE; | ||
} | ||
#else | ||
void | ||
mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins) | ||
{ | ||
} | ||
|
||
gboolean | ||
mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod) | ||
{ | ||
return FALSE; | ||
} | ||
|
||
#endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/ | ||
|
||
#endif /* DISABLE_JIT */ | ||
|
@@ -6157,6 +6208,12 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i | |
{ | ||
} | ||
|
||
gboolean | ||
mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod) | ||
{ | ||
return FALSE; | ||
} | ||
|
||
#endif /* MONO_ARCH_SIMD_INTRINSICS */ | ||
|
||
#if defined(TARGET_AMD64) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this true for all codegens (e.g. interpreter)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like interpreter mark all vectors as not being hardware accelerated:
so for that case it should be ok.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @kg
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We mark v128 as hardware accelerated in interp in some cases, I believe.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, so then this should be OK since it only affects v256/v512.