From 699d97b22a72ea325a0551a5bf8cedb1f814a757 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Wed, 16 Aug 2023 18:50:19 -0700 Subject: [PATCH 1/3] Fix win-arm64 native varargs ABI SIMD vector types should be passed in integer registers. This might require splitting a Vector128 between x7 and stack. --- src/coreclr/jit/codegencommon.cpp | 17 ++++++++--------- src/coreclr/jit/compiler.hpp | 6 ++++++ src/coreclr/jit/lclvars.cpp | 17 +++++++++-------- src/coreclr/jit/morph.cpp | 8 +++----- src/coreclr/jit/vartype.h | 2 ++ .../JitBlue/Runtime_71375/Runtime_71375.cs | 6 ++---- 6 files changed, 30 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 07e03d3e0c233..76c0212ceaba5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2979,12 +2979,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // Change regType to the HFA type when we have a HFA argument if (varDsc->lvIsHfaRegArg()) { -#if defined(TARGET_ARM64) - if (TargetOS::IsWindows && compiler->info.compIsVarArgs) + if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && compiler->info.compIsVarArgs) { assert(!"Illegal incoming HFA arg encountered in Vararg method."); } -#endif // defined(TARGET_ARM64) regType = varDsc->GetHfaType(); } @@ -3046,7 +3044,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++) { regNumber regNum = varDsc->lvRegNumForSlot(slotCounter); - var_types regType; + var_types slotRegType; #ifdef FEATURE_SIMD // Assumption 1: @@ -3075,15 +3073,15 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere if (varDsc->lvType == TYP_SIMD12) { - regType = TYP_DOUBLE; + slotRegType = TYP_DOUBLE; } else #endif { - regType = compiler->GetEightByteType(structDesc, slotCounter); + slotRegType = compiler->GetEightByteType(structDesc, slotCounter); } - regArgNum = genMapRegNumToRegArgNum(regNum, regType); + regArgNum = genMapRegNumToRegArgNum(regNum, slotRegType); if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) || (doingFloat && (structDesc.IsSseSlot(slotCounter)))) @@ -3101,7 +3099,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // register) regArgTab[regArgNum].varNum = varNum; regArgTab[regArgNum].slot = (char)(slotCounter + 1); - regArgTab[regArgNum].type = regType; + regArgTab[regArgNum].type = slotRegType; slots++; } } @@ -3120,7 +3118,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), regType); slots = 1; - if (TargetArchitecture::IsArm32) + if (TargetArchitecture::IsArm32 || + (TargetOS::IsWindows && TargetArchitecture::IsArm64 && compiler->info.compIsVarArgs)) { int lclSize = compiler->lvaLclSize(varNum); if (lclSize > REGSIZE_BYTES) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index ae9e180dbe83d..f5ca64f181d5f 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -2688,6 +2688,12 @@ inline var_types Compiler::mangleVarArgsType(var_types type) default: break; } + + if (varTypeIsSIMD(type)) + { + // Vectors also get passed in int registers. Use TYP_INT. + return TYP_INT; + } } #endif // defined(TARGET_ARMARCH) return type; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 3e922ee5ebcc6..e7c9a49ea4825 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -671,6 +671,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un varDsc->SetHfaType(hfaType); cSlots = varDsc->lvHfaSlots(); } + // The number of slots that must be enregistered if we are to consider this argument enregistered. // This is normally the same as cSlots, since we normally either enregister the entire object, // or none of it. For structs on ARM, however, we only need to enregister a single slot to consider @@ -682,11 +683,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un if (compFeatureArgSplit()) { // On arm64 Windows we will need to properly handle the case where a >8byte <=16byte - // struct is split between register r7 and virtual stack slot s[0] - // We will only do this for calls to vararg methods on Windows Arm64 + // struct (or vector) is split between register r7 and virtual stack slot s[0]. + // We will only do this for calls to vararg methods on Windows Arm64. + // SIMD types (for which `varTypeIsStruct()` returns `true`) are also passed in general-purpose + // registers and can be split between registers and stack with Windows arm64 native varargs. // // !!This does not affect the normal arm64 calling convention or Unix Arm64!! - if (this->info.compIsVarArgs && argType == TYP_STRUCT) + if (info.compIsVarArgs && (cSlots > 1)) { if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register @@ -775,7 +778,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un codeGen->regSet.rsMaskPreSpillRegArg |= regMask; } } -#else // !TARGET_ARM +#endif // TARGET_ARM #if defined(UNIX_AMD64_ABI) SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; @@ -817,7 +820,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un } } #endif // UNIX_AMD64_ABI -#endif // !TARGET_ARM // The final home for this incoming register might be our local stack frame. // For System V platforms the final home will always be on the local stack frame. @@ -921,8 +923,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister); #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // On LoongArch64 and RISCV64, if there aren't any remaining floating-point registers to pass the - // argument, - // integer registers (if any) are used instead. + // argument, integer registers (if any) are used instead. if (!canPassArgInRegisters && varTypeIsFloating(argType)) { canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister); @@ -979,7 +980,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un if (isHfaArg) { - // We need to save the fact that this HFA is enregistered + // We need to save the fact that this HFA is enregistered. // Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics. // In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector // register, we need to set the type now. Otherwise, later we'll assume this is passed by reference. diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f29d6a5714813..20733c64cfee0 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2269,13 +2269,11 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call hfaType = comp->GetHfaType(argSigClass); isHfaArg = varTypeIsValidHfaType(hfaType); -#if defined(TARGET_ARM64) - if (TargetOS::IsWindows) + if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && callIsVararg) { // Make sure for vararg methods isHfaArg is not true. - isHfaArg = callIsVararg ? false : isHfaArg; + isHfaArg = false; } -#endif // defined(TARGET_ARM64) if (isHfaArg) { @@ -2614,7 +2612,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // if (!isRegArg && (size > 1)) { - // Arm64 windows native varargs allows splitting a 16 byte struct between stack + // Arm64 windows native varargs allows splitting a 16 byte struct (or SIMD type) between stack // and the last general purpose register. if (TargetOS::IsWindows && callIsVararg) { diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 116d5ce2c0519..3c38a3427ebd2 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -348,6 +348,8 @@ inline bool varTypeUsesFloatArgReg(T vt) { #ifdef TARGET_ARM64 // Arm64 passes SIMD types in floating point registers. + // Exception: Windows arm64 native varargs passes them using general-purpose (integer) registers or + // by value on the stack, or split between registers and stack. return varTypeUsesFloatReg(vt); #else // Other targets pass them as regular structs - by reference or by value. diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs b/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs index 32010de2f339a..985092640f8f7 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs @@ -10,10 +10,8 @@ public class Runtime_71375 [Fact] public static int TestEntryPoint() { - // At the time of writing this test, the calling convention for incoming vector parameters on - // Windows ARM64 was broken, so only the fact that "Problem" compiled without asserts was - // checked. If/once the above is fixed, this test should be changed to actually call "Problem". - RuntimeHelpers.PrepareMethod(typeof(Runtime_71375).GetMethod("Problem").MethodHandle); + if (Problem()) + return 101; return 100; } From 136324647155247ea848f3e6493483c0e346571e Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Fri, 18 Aug 2023 10:31:40 -0700 Subject: [PATCH 2/3] Add more test cases --- .../JitBlue/Runtime_71375/Runtime_71375.cs | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs b/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs index 985092640f8f7..187767e11ddd1 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs @@ -24,4 +24,58 @@ public static bool Problem() { return VarArgs(0, 0, 0, 0, 0, 0, Vector128.AllBitsSet, __arglist()) != -1; } + + [Fact] + public static int TestEntryPoint2() + { + if (Case2()) + return 101; + return 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int VarArgs2(int arg1, int arg2, int arg3, int arg4, int arg5, Vector128 vecArg, __arglist) => vecArg.GetElement(0); + + [MethodImpl(MethodImplOptions.NoInlining)] + public static bool Case2() + { + // vector is not split: it is passed in registers x6, x7 + return VarArgs2(0, 0, 0, 0, 0, Vector128.AllBitsSet, __arglist()) != -1; + } + + [Fact] + public static int TestEntryPoint3() + { + if (Case3()) + return 101; + return 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int VarArgs3(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, Vector128 vecArg, __arglist) => vecArg.GetElement(0); + + [MethodImpl(MethodImplOptions.NoInlining)] + public static bool Case3() + { + // vector is not split: it is passed entirely on the stack + return VarArgs3(0, 0, 0, 0, 0, 0, 0, Vector128.AllBitsSet, __arglist()) != -1; + } + + [Fact] + public static int TestEntryPoint4() + { + if (Case4()) + return 101; + return 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int VarArgs4(int arg1, object arg2, int arg3, object arg4, int arg5, object arg6, Vector128 splitArg, __arglist) => splitArg.GetElement(0); + + [MethodImpl(MethodImplOptions.NoInlining)] + public static bool Case4() + { + // Spit the vector but also pass some object types so the GC needs to know about them. + return VarArgs4(0, new object(), 0, new object(), 0, new object(), Vector128.AllBitsSet, __arglist()) != -1; + } } From fc227dc653c5aec8592be1451ba9bc781eba80f0 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Fri, 18 Aug 2023 11:50:10 -0700 Subject: [PATCH 3/3] Update src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs Add additional varargs arguments after the vector Co-authored-by: Jan Kotas --- src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs b/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs index 187767e11ddd1..49d1979953218 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs @@ -76,6 +76,6 @@ public static int TestEntryPoint4() public static bool Case4() { // Spit the vector but also pass some object types so the GC needs to know about them. - return VarArgs4(0, new object(), 0, new object(), 0, new object(), Vector128.AllBitsSet, __arglist()) != -1; + return VarArgs4(0, new object(), 0, new object(), 0, new object(), Vector128.AllBitsSet, __arglist(new object(), 1, new object())) != -1; } }