Skip to content

Commit

Permalink
Ensure we emit vzeroupper for JIT helpers that need it
Browse files Browse the repository at this point in the history
  • Loading branch information
tannergooding committed Feb 11, 2024
1 parent f764a47 commit e1b0354
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 9 deletions.
36 changes: 27 additions & 9 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6074,7 +6074,9 @@ void CodeGen::genCall(GenTreeCall* call)
}
#endif // defined(DEBUG) && defined(TARGET_X86)

if (compiler->canUseVexEncoding())
var_types returnType = call->TypeGet();

if (call->IsPInvoke() && compiler->canUseVexEncoding())
{
// The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
// Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
Expand All @@ -6084,15 +6086,32 @@ void CodeGen::genCall(GenTreeCall* call)

bool needsZeroupper = false;

// TODO-XArch-CQ: We should emit vzeroupper before R2R calls if they aren't known to require AVX+
// TODO-XArch-CQ: We should emit vzeroupper before JIT helpers known to use floating-point

if (call->IsPInvoke() && (call->gtCallType != CT_HELPER))
switch (call->gtCallType)
{
// Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they
// may use the legacy encoding.
case CT_USER_FUNC:
case CT_INDIRECT:
{
// Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they
// may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT

needsZeroupper = true;
break;
}

case CT_HELPER:
{
// Most helpers are well known to not use any floating-point or SIMD logic internally, but
// a few do exist so we need to ensure they are handled. They are identified by taking or
// returning a floating-point or SIMD type, regardless of how it is actually passed/returned.

needsZeroupper = true;
needsZeroupper = call->gtArgs.PassesFloatOrSimd() || varTypeUsesFloatReg(returnType);
break;
}

default:
{
unreached();
}
}

if (needsZeroupper)
Expand Down Expand Up @@ -6120,7 +6139,6 @@ void CodeGen::genCall(GenTreeCall* call)
assert((gcInfo.gcRegByrefSetCur & killMask) == 0);
#endif

var_types returnType = call->TypeGet();
if (returnType != TYP_VOID)
{
#ifdef TARGET_X86
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1490,6 +1490,7 @@ CallArgs::CallArgs()
#ifdef UNIX_X86_ABI
, m_alignmentDone(false)
#endif
, m_passesFloatOrSimd(false)
{
}

Expand Down Expand Up @@ -9782,6 +9783,7 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co
m_hasStackArgs = other->m_hasStackArgs;
m_argsComplete = other->m_argsComplete;
m_needsTemps = other->m_needsTemps;
m_passesFloatOrSimd = other->m_passesFloatOrSimd;

// Unix x86 flags related to stack alignment intentionally not copied as
// they depend on where the call will be inserted.
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -4777,6 +4777,8 @@ class CallArgs
// Updateable flag, set to 'true' after we've done any required alignment.
bool m_alignmentDone : 1;
#endif
// True if we pass any floating-point or SIMD value
bool m_passesFloatOrSimd : 1;

void AddedWellKnownArg(WellKnownArg arg);
void RemovedWellKnownArg(WellKnownArg arg);
Expand Down Expand Up @@ -4846,6 +4848,7 @@ class CallArgs
bool HasRegArgs() const { return m_hasRegArgs; }
bool HasStackArgs() const { return m_hasStackArgs; }
bool NeedsTemps() const { return m_needsTemps; }
bool PassesFloatOrSimd() const { return m_passesFloatOrSimd; }

#ifdef UNIX_X86_ABI
void ComputeStackAlignment(unsigned curStackLevelInBytes)
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1967,6 +1967,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call

bool callHasRetBuffArg = HasRetBuffer();
bool callIsVararg = IsVarArgs();
bool passesFloatOrSimd = false;

#ifdef TARGET_ARM
regMaskTP argSkippedRegMask = RBM_NONE;
Expand Down Expand Up @@ -2242,6 +2243,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
const var_types argSigType = arg.GetSignatureType();
const CORINFO_CLASS_HANDLE argSigClass = arg.GetSignatureClassHandle();

passesFloatOrSimd |= varTypeUsesFloatReg(argSigType);

// Setup any HFA information about the argument.
bool isHfaArg = false;
var_types hfaType = TYP_UNDEF;
Expand Down Expand Up @@ -3027,6 +3030,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call
}
#endif

m_passesFloatOrSimd = passesFloatOrSimd;
m_abiInformationDetermined = true;
}

Expand Down

0 comments on commit e1b0354

Please sign in to comment.