From a79c62ddc8089cf2879ed36eac9aa333b32bde5f Mon Sep 17 00:00:00 2001 From: Johan Lorensson Date: Fri, 9 Feb 2024 16:51:09 +0100 Subject: [PATCH] [Mono]: Reduce Mono AOT cross compiler x64 memory footprint. (#97096) Building .net8 S.P.C using Mono AOT cross compiler in full AOT consumes a large amount of memory (up to 6 GB). This is mainly due to generated LLVM module not being optimized at all while kept in memory during full module generation. Mono x64 also lacks support for several intrinsics as well as Vector 256/512 that in turn leads to massive inlining of intrinsics functions generating a very large LLVM module, where majority of this code ends up as dead code due to IsSupported/IsHardwareAccelerated returning false. The follow commit adjusts several things that will bring down the memory usage, compiling .net8/.net9 Mono S.P.C on x64 Windows from 6 GB down to ~750 MB. * Use PSNE implementations on intrinsics not supported on Mono. * Add ILLinker substitutions for intrinsics not supported on Mono. Enables ILLinker to do dead code elimination, reduce code to AOT compile. * Prevent aggressive inlining for a couple of unsupported intrinsics types making sure we don't end up with excessive inlining, exploding code size. * Run a couple of LLVM optimization passes on each generated method doing early code simplification and dead code elimination during LLVM module generation. * Explicit SN_get_IsHardwareAccelerated/SN_get_IsSupported intrinsics implementation for all unsupported Mono x64 SIMD intrinsics. * Fixed numerous memory leaks in Mono AOT cross compiler code. * Fix a couple of sequence points free after use errors. * Fix an anonymous struct build warning triggering build error for LLVM enabled cross compiler on Windows. --------- Co-authored-by: Zoltan Varga --- .../System.Private.CoreLib.Shared.projitems | 30 ++++--- .../System.Private.CoreLib.csproj | 2 + ...LLink.Substitutions.Intrinsics.Vectors.xml | 10 +++ .../ILLink.Substitutions.Intrinsics.x86.xml | 79 +++++++++++++++++++ src/mono/mono/mini/aot-compiler.c | 2 +- src/mono/mono/mini/method-to-ir.c | 11 ++- src/mono/mono/mini/mini.h | 1 + src/mono/mono/mini/simd-intrinsics.c | 61 +++++++++++++- 8 files changed, 182 insertions(+), 14 deletions(-) create mode 100644 src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml create mode 100644 src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 261968ae7f8f3d..94089d1018d8d1 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2576,17 +2576,26 @@ - - - - - - - - + + + + + + + + + + + + + + + + - + + @@ -2597,7 +2606,8 @@ - + + diff --git a/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj index 49c5d602711719..1c085ed36e446e 100644 --- a/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/mono/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -151,6 +151,8 @@ + + diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml new file mode 100644 index 00000000000000..c50829b7843929 --- /dev/null +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.Vectors.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml new file mode 100644 index 00000000000000..bd008db96ba1d0 --- /dev/null +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mono/mono/mini/aot-compiler.c b/src/mono/mono/mini/aot-compiler.c index f8275230e210be..d8f80b0bc6a932 100644 --- a/src/mono/mono/mini/aot-compiler.c +++ b/src/mono/mono/mini/aot-compiler.c @@ -14229,7 +14229,7 @@ static void acfg_free (MonoAotCompile *acfg) { #ifdef ENABLE_LLVM - if (acfg->aot_opts.llvm) + if (mono_use_llvm || acfg->aot_opts.llvm) mono_llvm_free_aot_module (); #endif diff --git a/src/mono/mono/mini/method-to-ir.c b/src/mono/mono/mini/method-to-ir.c index 87b9498074ce02..86c4eb29158746 100644 --- a/src/mono/mono/mini/method-to-ir.c +++ b/src/mono/mono/mini/method-to-ir.c @@ -4746,6 +4746,15 @@ mini_inline_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature * return inline_method (cfg, cmethod, fsig, sp, ip, real_offset, inline_always, NULL); } +static gboolean +aggressive_inline_method (MonoMethod *cmethod) +{ + gboolean aggressive_inline = m_method_is_aggressive_inlining (cmethod); + if (aggressive_inline) + aggressive_inline = !mono_simd_unsupported_aggressive_inline_intrinsic_type (cmethod); + return aggressive_inline; +} + /* * inline_method: * @@ -4871,7 +4880,7 @@ inline_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, cfg->disable_inline = prev_disable_inline; cfg->inline_depth --; - if ((costs >= 0 && costs < 60) || inline_always || (costs >= 0 && (cmethod->iflags & METHOD_IMPL_ATTRIBUTE_AGGRESSIVE_INLINING))) { + if ((costs >= 0 && costs < 60) || inline_always || (costs >= 0 && aggressive_inline_method (cmethod))) { if (cfg->verbose_level > 2) printf ("INLINE END %s -> %s\n", mono_method_full_name (cfg->method, TRUE), mono_method_full_name (cmethod, TRUE)); diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 7a9ca5644678a4..9982afc22e3f2f 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2962,6 +2962,7 @@ MonoInst* mono_emit_common_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoInst* mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args); MonoInst* mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr); void mono_simd_intrinsics_init (void); +gboolean mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod); MonoMethod* mini_method_to_shared (MonoMethod *method); // null if not shared diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 13d341f9e8bf01..73e5d88f3b504a 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1178,6 +1178,20 @@ create_class_instance (const char* name_space, const char *name, MonoType *param return ivector_inst; } +static gboolean +is_supported_vector_primitive_type (MonoType *type) +{ + gboolean constrained_generic_param = (type->type == MONO_TYPE_VAR || type->type == MONO_TYPE_MVAR); + + if (constrained_generic_param && type->data.generic_param->gshared_constraint && MONO_TYPE_IS_VECTOR_PRIMITIVE (type->data.generic_param->gshared_constraint)) + return TRUE; + + if (MONO_TYPE_IS_VECTOR_PRIMITIVE (type)) + return TRUE; + + return FALSE; +} + static guint16 sri_vector_methods [] = { SN_Abs, SN_Add, @@ -1423,8 +1437,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; if (vector_size == 256 || vector_size == 512) - return NULL; - + return NULL; + // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 if (!COMPILE_LLVM (cfg)) { @@ -2477,6 +2491,12 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f g_free (name); } + if (id == SN_get_IsSupported) { + MonoInst *ins; + EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0); + return ins; + } + // Apart from filtering out non-primitive types this also filters out shared generic instance types like: T_BYTE which cannot be intrinsified if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) { // Happens often in gshared code @@ -3199,6 +3219,11 @@ emit_sys_numerics_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSig type = m_class_get_byval_arg (klass); etype = mono_class_get_context (klass)->class_inst->type_argv [0]; + if (id == SN_get_IsSupported) { + EMIT_NEW_ICONST (cfg, ins, is_supported_vector_primitive_type (etype) ? 1 : 0); + return ins; + } + if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) return NULL; @@ -6118,11 +6143,37 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i decompose_vtype_opt_store_arg (cfg, bb, ins, &(ins->dreg)); } } + +gboolean +mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod *cmethod) +{ + /* + * If a method has been marked with aggressive inlining, check if we support + * aggressive inlining of the intrinsics type, if not, ignore aggressive inlining + * since it could end up inlining a large amount of code that most likely will end + * up as dead code. + */ + if (!strcmp (m_class_get_name_space (cmethod->klass), "System.Runtime.Intrinsics")) { + if (!strncmp(m_class_get_name (cmethod->klass), "Vector", 6)) { + const char *vector_type = m_class_get_name (cmethod->klass) + 6; + if (!strcmp(vector_type, "256`1") || !strcmp(vector_type, "512`1")) + return TRUE; + } + } + return FALSE; +} #else void mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *ins) { } + +gboolean +mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod) +{ + return FALSE; +} + #endif /*defined(TARGET_WIN32) && defined(TARGET_AMD64)*/ #endif /* DISABLE_JIT */ @@ -6157,6 +6208,12 @@ mono_simd_decompose_intrinsic (MonoCompile *cfg, MonoBasicBlock *bb, MonoInst *i { } +gboolean +mono_simd_unsupported_aggressive_inline_intrinsic_type (MonoMethod* cmethod) +{ + return FALSE; +} + #endif /* MONO_ARCH_SIMD_INTRINSICS */ #if defined(TARGET_AMD64)