Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VPCLMULQDQ intrinsics #109137

Merged
merged 20 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"),
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled")
Expand Down
230 changes: 116 additions & 114 deletions src/coreclr/inc/corinfoinstructionset.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 381fc250-b8f3-4cee-834e-b0bc682a09f2 */
0x381fc250,
0xb8f3,
0x4cee,
{0x83, 0x4e, 0xb0, 0xbc, 0x68, 0x2a, 0x09, 0xf2}
constexpr GUID JITEEVersionIdentifier = { /* 9014d652-5dc7-4edf-9285-6644d0898fb5 */
0x9014d652,
0x5dc7,
0x4edf,
{0x92, 0x85, 0x66, 0x44, 0xd0, 0x89, 0x8f, 0xb5}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Avx10v1_V512=46,
READYTORUN_INSTRUCTION_EVEX=47,
READYTORUN_INSTRUCTION_Apx=48,
READYTORUN_INSTRUCTION_Pclmulqdq_V256=49,
READYTORUN_INSTRUCTION_Pclmulqdq_V512=50,

};

Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6172,6 +6172,12 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ);
}

if (JitConfig.EnableVPCLMULQDQ() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V256);
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V512);
}

if (JitConfig.EnablePOPCNT() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT);
Expand Down
14 changes: 13 additions & 1 deletion src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,19 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const
{
return false;
}
return HasEvexEncoding(ins);

switch (ins)
{
case INS_pclmulqdq:
saucecontrol marked this conversation as resolved.
Show resolved Hide resolved
{
return emitComp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256);
}

default:
{
return HasEvexEncoding(ins);
}
}
}

//------------------------------------------------------------------------
Expand Down
27 changes: 23 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20572,19 +20572,38 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
// EVEX form for its intended lowering instruction.
//
// Return Value:
// true if the intrisic node lowering instruction has an EVEX form
// true if the intrinsic node lowering instruction has an EVEX form
//
bool GenTree::isEvexCompatibleHWIntrinsic() const
bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const
{
return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId());
#if defined(TARGET_XARCH)
if (OperIsHWIntrinsic())
{
NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();

switch (intrinsicId)
{
case NI_PCLMULQDQ_CarrylessMultiply:
{
return comp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256);
}

default:
{
return HWIntrinsicInfo::HasEvexSemantics(intrinsicId);
}
}
}
#endif
return false;
}

//------------------------------------------------------------------------
// isEmbeddedMaskingCompatibleHWIntrinsic : Checks if the intrinsic is compatible
// with the EVEX embedded masking form for its intended lowering instruction.
//
// Return Value:
// true if the intrisic node lowering instruction has an EVEX embedded masking
// true if the intrinsic node lowering instruction has an EVEX embedded masking
//
bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ struct GenTree
bool isCommutativeHWIntrinsic() const;
bool isContainableHWIntrinsic() const;
bool isRMWHWIntrinsic(Compiler* comp);
bool isEvexCompatibleHWIntrinsic() const;
bool isEvexCompatibleHWIntrinsic(Compiler* comp) const;
bool isEmbeddedMaskingCompatibleHWIntrinsic() const;
#else
bool isCommutativeHWIntrinsic() const
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ FIRST_NI_FMA, LAST_NI_FMA },
{ FIRST_NI_LZCNT, LAST_NI_LZCNT },
{ FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ },
{ FIRST_NI_PCLMULQDQ_V256, LAST_NI_PCLMULQDQ_V256 },
{ FIRST_NI_PCLMULQDQ_V512, LAST_NI_PCLMULQDQ_V512 },
{ FIRST_NI_POPCNT, LAST_NI_POPCNT },
{ FIRST_NI_Vector128, LAST_NI_Vector128 },
{ FIRST_NI_Vector256, LAST_NI_Vector256 },
Expand Down Expand Up @@ -822,9 +824,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64
{ FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 },
{ NI_Illegal, NI_Illegal }, // AVXVNNI_X64
{ NI_Illegal, NI_Illegal }, // MOVBE_X64
{ NI_Illegal, NI_Illegal }, // X86Serialize_X64
{ NI_Illegal, NI_Illegal }, // EVEX_X64
{ FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 },
{ NI_Illegal, NI_Illegal }, // AVX512BW_X64
{ NI_Illegal, NI_Illegal }, // AVX512CD_X64
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_AVX10v1_V512:
case InstructionSet_AVX10v1_V512_X64:
case InstructionSet_EVEX:
case InstructionSet_EVEX_X64:
{
genAvxFamilyIntrinsic(node, instOptions);
break;
Expand Down
20 changes: 19 additions & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1524,9 +1524,27 @@ HARDWARE_INTRINSIC(LZCNT_X64, LeadingZeroCount,
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ Intrinsics
#define FIRST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ_V256 Intrinsics
#define FIRST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ_V256, CarrylessMultiply, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ_V512 Intrinsics
#define FIRST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
Expand Down
35 changes: 30 additions & 5 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_POPCNT_X64;
case InstructionSet_X86Serialize:
return InstructionSet_X86Serialize_X64;
case InstructionSet_EVEX:
return InstructionSet_EVEX_X64;
default:
return InstructionSet_NONE;
}
Expand Down Expand Up @@ -103,13 +101,32 @@ static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa)
}

//------------------------------------------------------------------------
// V512VersionOfIsa: Gets the corresponding AVX10V512 only InstructionSet for a given InstructionSet
// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet
//
// Arguments:
// isa -- The InstructionSet ID
//
// Return Value:
// The AVX10V512 only InstructionSet associated with isa
// The V256 only InstructionSet associated with isa
static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa)
{
switch (isa)
{
case InstructionSet_PCLMULQDQ:
return InstructionSet_PCLMULQDQ_V256;
default:
return InstructionSet_NONE;
}
}

//------------------------------------------------------------------------
// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet
//
// Arguments:
// isa -- The InstructionSet ID
//
// Return Value:
// The V512 only InstructionSet associated with isa
static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
{
switch (isa)
Expand All @@ -118,6 +135,8 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVX10v1_V512;
case InstructionSet_AVX10v1_X64:
return InstructionSet_AVX10v1_V512_X64;
case InstructionSet_PCLMULQDQ:
return InstructionSet_PCLMULQDQ_V512;
default:
return InstructionSet_NONE;
}
Expand Down Expand Up @@ -330,7 +349,11 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className,

if (className[0] == 'V')
{
if (strcmp(className, "V512") == 0)
if (strcmp(className, "V256") == 0)
{
return V256VersionOfIsa(enclosingIsa);
}
else if (strcmp(className, "V512") == 0)
{
return V512VersionOfIsa(enclosingIsa);
}
Expand Down Expand Up @@ -847,6 +870,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
case InstructionSet_LZCNT_X64:
case InstructionSet_PCLMULQDQ:
case InstructionSet_PCLMULQDQ_X64:
case InstructionSet_PCLMULQDQ_V256:
case InstructionSet_PCLMULQDQ_V512:
case InstructionSet_POPCNT:
case InstructionSet_POPCNT_X64:
case InstructionSet_SSE:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE,
INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow
INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation
INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist
INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords
INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords

// SSE4.1
INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ RELEASE_CONFIG_INTEGER(EnableBMI2, "EnableBMI2",
RELEASE_CONFIG_INTEGER(EnableFMA, "EnableFMA", 1) // Allows FMA+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableLZCNT, "EnableLZCNT", 1) // Allows LZCNT+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, "EnablePCLMULQDQ", 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, "EnableVPCLMULQDQ", 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnablePOPCNT, "EnablePOPCNT", 1) // Allows POPCNT+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableSSE, "EnableSSE", 1) // Allows SSE+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableSSE2, "EnableSSE2", 1) // Allows SSE2+ hardware intrinsics to be disabled
Expand Down
Loading
Loading