Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix win-arm64 native varargs ABI #90712

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2979,12 +2979,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
// Change regType to the HFA type when we have a HFA argument
if (varDsc->lvIsHfaRegArg())
{
#if defined(TARGET_ARM64)
if (TargetOS::IsWindows && compiler->info.compIsVarArgs)
if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && compiler->info.compIsVarArgs)
{
assert(!"Illegal incoming HFA arg encountered in Vararg method.");
}
#endif // defined(TARGET_ARM64)
regType = varDsc->GetHfaType();
}

Expand Down Expand Up @@ -3046,7 +3044,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
{
regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
var_types regType;
var_types slotRegType;

#ifdef FEATURE_SIMD
// Assumption 1:
Expand Down Expand Up @@ -3075,15 +3073,15 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere

if (varDsc->lvType == TYP_SIMD12)
{
regType = TYP_DOUBLE;
slotRegType = TYP_DOUBLE;
}
else
#endif
{
regType = compiler->GetEightByteType(structDesc, slotCounter);
slotRegType = compiler->GetEightByteType(structDesc, slotCounter);
}

regArgNum = genMapRegNumToRegArgNum(regNum, regType);
regArgNum = genMapRegNumToRegArgNum(regNum, slotRegType);

if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
(doingFloat && (structDesc.IsSseSlot(slotCounter))))
Expand All @@ -3101,7 +3099,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
// register)
regArgTab[regArgNum].varNum = varNum;
regArgTab[regArgNum].slot = (char)(slotCounter + 1);
regArgTab[regArgNum].type = regType;
regArgTab[regArgNum].type = slotRegType;
slots++;
}
}
Expand All @@ -3120,7 +3118,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
regArgNum = genMapRegNumToRegArgNum(varDsc->GetArgReg(), regType);
slots = 1;

if (TargetArchitecture::IsArm32)
if (TargetArchitecture::IsArm32 ||
(TargetOS::IsWindows && TargetArchitecture::IsArm64 && compiler->info.compIsVarArgs))
{
int lclSize = compiler->lvaLclSize(varNum);
if (lclSize > REGSIZE_BYTES)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2688,6 +2688,12 @@ inline var_types Compiler::mangleVarArgsType(var_types type)
default:
break;
}

if (varTypeIsSIMD(type))
{
// Vectors also get passed in int registers. Use TYP_INT.
return TYP_INT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the size mismatch here fine?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out the size doesn't matter; the only thing that matters is INT (use general-purpose registers) or non-INT (use floaing-point/SIMD registers).

}
}
#endif // defined(TARGET_ARMARCH)
return type;
Expand Down
17 changes: 9 additions & 8 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
varDsc->SetHfaType(hfaType);
cSlots = varDsc->lvHfaSlots();
}

// The number of slots that must be enregistered if we are to consider this argument enregistered.
// This is normally the same as cSlots, since we normally either enregister the entire object,
// or none of it. For structs on ARM, however, we only need to enregister a single slot to consider
Expand All @@ -682,11 +683,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
if (compFeatureArgSplit())
{
// On arm64 Windows we will need to properly handle the case where a >8byte <=16byte
// struct is split between register r7 and virtual stack slot s[0]
// We will only do this for calls to vararg methods on Windows Arm64
// struct (or vector) is split between register r7 and virtual stack slot s[0].
// We will only do this for calls to vararg methods on Windows Arm64.
// SIMD types (for which `varTypeIsStruct()` returns `true`) are also passed in general-purpose
// registers and can be split between registers and stack with Windows arm64 native varargs.
//
// !!This does not affect the normal arm64 calling convention or Unix Arm64!!
if (this->info.compIsVarArgs && argType == TYP_STRUCT)
if (info.compIsVarArgs && (cSlots > 1))
{
if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register
!varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register
Expand Down Expand Up @@ -775,7 +778,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
codeGen->regSet.rsMaskPreSpillRegArg |= regMask;
}
}
#else // !TARGET_ARM
#endif // TARGET_ARM

#if defined(UNIX_AMD64_ABI)
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
Expand Down Expand Up @@ -817,7 +820,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
}
}
#endif // UNIX_AMD64_ABI
#endif // !TARGET_ARM

// The final home for this incoming register might be our local stack frame.
// For System V platforms the final home will always be on the local stack frame.
Expand Down Expand Up @@ -921,8 +923,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
// On LoongArch64 and RISCV64, if there aren't any remaining floating-point registers to pass the
// argument,
// integer registers (if any) are used instead.
// argument, integer registers (if any) are used instead.
if (!canPassArgInRegisters && varTypeIsFloating(argType))
{
canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
Expand Down Expand Up @@ -979,7 +980,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un

if (isHfaArg)
{
// We need to save the fact that this HFA is enregistered
// We need to save the fact that this HFA is enregistered.
// Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics.
// In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector
// register, we need to set the type now. Otherwise, later we'll assume this is passed by reference.
Expand Down
8 changes: 3 additions & 5 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2269,13 +2269,11 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
hfaType = comp->GetHfaType(argSigClass);
isHfaArg = varTypeIsValidHfaType(hfaType);

#if defined(TARGET_ARM64)
if (TargetOS::IsWindows)
if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && callIsVararg)
{
// Make sure for vararg methods isHfaArg is not true.
isHfaArg = callIsVararg ? false : isHfaArg;
isHfaArg = false;
}
#endif // defined(TARGET_ARM64)

if (isHfaArg)
{
Expand Down Expand Up @@ -2614,7 +2612,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
//
if (!isRegArg && (size > 1))
{
// Arm64 windows native varargs allows splitting a 16 byte struct between stack
// Arm64 windows native varargs allows splitting a 16 byte struct (or SIMD type) between stack
// and the last general purpose register.
if (TargetOS::IsWindows && callIsVararg)
{
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/vartype.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,8 @@ inline bool varTypeUsesFloatArgReg(T vt)
{
#ifdef TARGET_ARM64
// Arm64 passes SIMD types in floating point registers.
// Exception: Windows arm64 native varargs passes them using general-purpose (integer) registers or
// by value on the stack, or split between registers and stack.
return varTypeUsesFloatReg(vt);
#else
// Other targets pass them as regular structs - by reference or by value.
Expand Down
60 changes: 56 additions & 4 deletions src/tests/JIT/Regression/JitBlue/Runtime_71375/Runtime_71375.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ public class Runtime_71375
[Fact]
public static int TestEntryPoint()
{
// At the time of writing this test, the calling convention for incoming vector parameters on
// Windows ARM64 was broken, so only the fact that "Problem" compiled without asserts was
// checked. If/once the above is fixed, this test should be changed to actually call "Problem".
RuntimeHelpers.PrepareMethod(typeof(Runtime_71375).GetMethod("Problem").MethodHandle);
if (Problem())
return 101;

return 100;
}
Expand All @@ -26,4 +24,58 @@ public static bool Problem()
{
return VarArgs(0, 0, 0, 0, 0, 0, Vector128<int>.AllBitsSet, __arglist()) != -1;
}

[Fact]
public static int TestEntryPoint2()
{
if (Case2())
return 101;
return 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static int VarArgs2(int arg1, int arg2, int arg3, int arg4, int arg5, Vector128<int> vecArg, __arglist) => vecArg.GetElement(0);

[MethodImpl(MethodImplOptions.NoInlining)]
public static bool Case2()
{
// vector is not split: it is passed in registers x6, x7
return VarArgs2(0, 0, 0, 0, 0, Vector128<int>.AllBitsSet, __arglist()) != -1;
}

[Fact]
public static int TestEntryPoint3()
{
if (Case3())
return 101;
return 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static int VarArgs3(int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, Vector128<int> vecArg, __arglist) => vecArg.GetElement(0);

[MethodImpl(MethodImplOptions.NoInlining)]
public static bool Case3()
{
// vector is not split: it is passed entirely on the stack
return VarArgs3(0, 0, 0, 0, 0, 0, 0, Vector128<int>.AllBitsSet, __arglist()) != -1;
}

[Fact]
public static int TestEntryPoint4()
{
if (Case4())
return 101;
return 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static int VarArgs4(int arg1, object arg2, int arg3, object arg4, int arg5, object arg6, Vector128<int> splitArg, __arglist) => splitArg.GetElement(0);

[MethodImpl(MethodImplOptions.NoInlining)]
public static bool Case4()
{
// Spit the vector but also pass some object types so the GC needs to know about them.
return VarArgs4(0, new object(), 0, new object(), 0, new object(), Vector128<int>.AllBitsSet, __arglist(new object(), 1, new object())) != -1;
}
}