Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VPCLMULQDQ intrinsics #109137

Merged
merged 20 commits into from
Nov 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
@@ -777,6 +777,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"),
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled")
230 changes: 116 additions & 114 deletions src/coreclr/inc/corinfoinstructionset.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 381fc250-b8f3-4cee-834e-b0bc682a09f2 */
0x381fc250,
0xb8f3,
0x4cee,
{0x83, 0x4e, 0xb0, 0xbc, 0x68, 0x2a, 0x09, 0xf2}
constexpr GUID JITEEVersionIdentifier = { /* 9014d652-5dc7-4edf-9285-6644d0898fb5 */
0x9014d652,
0x5dc7,
0x4edf,
{0x92, 0x85, 0x66, 0x44, 0xd0, 0x89, 0x8f, 0xb5}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
2 changes: 2 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
@@ -56,6 +56,8 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Avx10v1_V512=46,
READYTORUN_INSTRUCTION_EVEX=47,
READYTORUN_INSTRUCTION_Apx=48,
READYTORUN_INSTRUCTION_Pclmulqdq_V256=49,
READYTORUN_INSTRUCTION_Pclmulqdq_V512=50,

};

6 changes: 6 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
@@ -6172,6 +6172,12 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ);
}

if (JitConfig.EnableVPCLMULQDQ() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V256);
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V512);
}

if (JitConfig.EnablePOPCNT() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT);
14 changes: 13 additions & 1 deletion src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
@@ -254,7 +254,19 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const
{
return false;
}
return HasEvexEncoding(ins);

switch (ins)
{
case INS_pclmulqdq:
saucecontrol marked this conversation as resolved.
Show resolved Hide resolved
{
return emitComp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256);
}

default:
{
return HasEvexEncoding(ins);
}
}
}

//------------------------------------------------------------------------
27 changes: 23 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
@@ -20572,19 +20572,38 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
// EVEX form for its intended lowering instruction.
//
// Return Value:
// true if the intrisic node lowering instruction has an EVEX form
// true if the intrinsic node lowering instruction has an EVEX form
//
bool GenTree::isEvexCompatibleHWIntrinsic() const
bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const
{
return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId());
#if defined(TARGET_XARCH)
if (OperIsHWIntrinsic())
{
NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();

switch (intrinsicId)
{
case NI_PCLMULQDQ_CarrylessMultiply:
{
return comp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256);
}

default:
{
return HWIntrinsicInfo::HasEvexSemantics(intrinsicId);
}
}
}
#endif
return false;
}

//------------------------------------------------------------------------
// isEmbeddedMaskingCompatibleHWIntrinsic : Checks if the intrinsic is compatible
// with the EVEX embedded masking form for its intended lowering instruction.
//
// Return Value:
// true if the intrisic node lowering instruction has an EVEX embedded masking
// true if the intrinsic node lowering instruction has an EVEX embedded masking
//
bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const
{
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
@@ -1480,7 +1480,7 @@ struct GenTree
bool isCommutativeHWIntrinsic() const;
bool isContainableHWIntrinsic() const;
bool isRMWHWIntrinsic(Compiler* comp);
bool isEvexCompatibleHWIntrinsic() const;
bool isEvexCompatibleHWIntrinsic(Compiler* comp) const;
bool isEmbeddedMaskingCompatibleHWIntrinsic() const;
#else
bool isCommutativeHWIntrinsic() const
4 changes: 2 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
@@ -781,6 +781,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ FIRST_NI_FMA, LAST_NI_FMA },
{ FIRST_NI_LZCNT, LAST_NI_LZCNT },
{ FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ },
{ FIRST_NI_PCLMULQDQ_V256, LAST_NI_PCLMULQDQ_V256 },
{ FIRST_NI_PCLMULQDQ_V512, LAST_NI_PCLMULQDQ_V512 },
{ FIRST_NI_POPCNT, LAST_NI_POPCNT },
{ FIRST_NI_Vector128, LAST_NI_Vector128 },
{ FIRST_NI_Vector256, LAST_NI_Vector256 },
@@ -822,9 +824,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64
{ FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 },
{ NI_Illegal, NI_Illegal }, // AVXVNNI_X64
{ NI_Illegal, NI_Illegal }, // MOVBE_X64
{ NI_Illegal, NI_Illegal }, // X86Serialize_X64
{ NI_Illegal, NI_Illegal }, // EVEX_X64
{ FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 },
{ NI_Illegal, NI_Illegal }, // AVX512BW_X64
{ NI_Illegal, NI_Illegal }, // AVX512CD_X64
1 change: 0 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
@@ -782,7 +782,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_AVX10v1_V512:
case InstructionSet_AVX10v1_V512_X64:
case InstructionSet_EVEX:
case InstructionSet_EVEX_X64:
{
genAvxFamilyIntrinsic(node, instOptions);
break;
20 changes: 19 additions & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
@@ -1524,9 +1524,27 @@ HARDWARE_INTRINSIC(LZCNT_X64, LeadingZeroCount,
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ Intrinsics
#define FIRST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ_V256 Intrinsics
#define FIRST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ_V256, CarrylessMultiply, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ_V512 Intrinsics
#define FIRST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
35 changes: 30 additions & 5 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
@@ -68,8 +68,6 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_POPCNT_X64;
case InstructionSet_X86Serialize:
return InstructionSet_X86Serialize_X64;
case InstructionSet_EVEX:
return InstructionSet_EVEX_X64;
default:
return InstructionSet_NONE;
}
@@ -103,13 +101,32 @@ static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa)
}

//------------------------------------------------------------------------
// V512VersionOfIsa: Gets the corresponding AVX10V512 only InstructionSet for a given InstructionSet
// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet
//
// Arguments:
// isa -- The InstructionSet ID
//
// Return Value:
// The AVX10V512 only InstructionSet associated with isa
// The V256 only InstructionSet associated with isa
static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa)
{
switch (isa)
{
case InstructionSet_PCLMULQDQ:
return InstructionSet_PCLMULQDQ_V256;
default:
return InstructionSet_NONE;
}
}

//------------------------------------------------------------------------
// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet
//
// Arguments:
// isa -- The InstructionSet ID
//
// Return Value:
// The V512 only InstructionSet associated with isa
static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
{
switch (isa)
@@ -118,6 +135,8 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVX10v1_V512;
case InstructionSet_AVX10v1_X64:
return InstructionSet_AVX10v1_V512_X64;
case InstructionSet_PCLMULQDQ:
return InstructionSet_PCLMULQDQ_V512;
default:
return InstructionSet_NONE;
}
@@ -330,7 +349,11 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className,

if (className[0] == 'V')
{
if (strcmp(className, "V512") == 0)
if (strcmp(className, "V256") == 0)
{
return V256VersionOfIsa(enclosingIsa);
}
else if (strcmp(className, "V512") == 0)
{
return V512VersionOfIsa(enclosingIsa);
}
@@ -847,6 +870,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
case InstructionSet_LZCNT_X64:
case InstructionSet_PCLMULQDQ:
case InstructionSet_PCLMULQDQ_X64:
case InstructionSet_PCLMULQDQ_V256:
case InstructionSet_PCLMULQDQ_V512:
case InstructionSet_POPCNT:
case InstructionSet_POPCNT_X64:
case InstructionSet_SSE:
2 changes: 1 addition & 1 deletion src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
@@ -400,7 +400,7 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE,
INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow
INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation
INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist
INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords
INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords

// SSE4.1
INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
@@ -406,6 +406,7 @@ RELEASE_CONFIG_INTEGER(EnableBMI2, "EnableBMI2",
RELEASE_CONFIG_INTEGER(EnableFMA, "EnableFMA", 1) // Allows FMA+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableLZCNT, "EnableLZCNT", 1) // Allows LZCNT+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, "EnablePCLMULQDQ", 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, "EnableVPCLMULQDQ", 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnablePOPCNT, "EnablePOPCNT", 1) // Allows POPCNT+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableSSE, "EnableSSE", 1) // Allows SSE+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableSSE2, "EnableSSE2", 1) // Allows SSE2+ hardware intrinsics to be disabled
Loading
Loading