Skip to content

Commit

Permalink
Add GFNI Intrinsics (#109537)
Browse files Browse the repository at this point in the history
* add GFNI intrinsics

* add tests

* rename file

* add missing tests and AOT handling

* fix build

* fix test result
  • Loading branch information
saucecontrol authored Nov 22, 2024
1 parent 7fe4e4f commit 35f2b13
Show file tree
Hide file tree
Showing 43 changed files with 1,338 additions and 128 deletions.
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled")
Expand Down
162 changes: 108 additions & 54 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,33 +86,37 @@ enum CORINFO_InstructionSet
InstructionSet_APX=41,
InstructionSet_AVX10v2=42,
InstructionSet_AVX10v2_V512=43,
InstructionSet_X86Base_X64=44,
InstructionSet_SSE_X64=45,
InstructionSet_SSE2_X64=46,
InstructionSet_SSE3_X64=47,
InstructionSet_SSSE3_X64=48,
InstructionSet_SSE41_X64=49,
InstructionSet_SSE42_X64=50,
InstructionSet_AVX_X64=51,
InstructionSet_AVX2_X64=52,
InstructionSet_AES_X64=53,
InstructionSet_BMI1_X64=54,
InstructionSet_BMI2_X64=55,
InstructionSet_FMA_X64=56,
InstructionSet_LZCNT_X64=57,
InstructionSet_PCLMULQDQ_X64=58,
InstructionSet_POPCNT_X64=59,
InstructionSet_AVXVNNI_X64=60,
InstructionSet_X86Serialize_X64=61,
InstructionSet_AVX512F_X64=62,
InstructionSet_AVX512BW_X64=63,
InstructionSet_AVX512CD_X64=64,
InstructionSet_AVX512DQ_X64=65,
InstructionSet_AVX512VBMI_X64=66,
InstructionSet_AVX10v1_X64=67,
InstructionSet_AVX10v1_V512_X64=68,
InstructionSet_AVX10v2_X64=69,
InstructionSet_AVX10v2_V512_X64=70,
InstructionSet_GFNI=44,
InstructionSet_GFNI_V256=45,
InstructionSet_GFNI_V512=46,
InstructionSet_X86Base_X64=47,
InstructionSet_SSE_X64=48,
InstructionSet_SSE2_X64=49,
InstructionSet_SSE3_X64=50,
InstructionSet_SSSE3_X64=51,
InstructionSet_SSE41_X64=52,
InstructionSet_SSE42_X64=53,
InstructionSet_AVX_X64=54,
InstructionSet_AVX2_X64=55,
InstructionSet_AES_X64=56,
InstructionSet_BMI1_X64=57,
InstructionSet_BMI2_X64=58,
InstructionSet_FMA_X64=59,
InstructionSet_LZCNT_X64=60,
InstructionSet_PCLMULQDQ_X64=61,
InstructionSet_POPCNT_X64=62,
InstructionSet_AVXVNNI_X64=63,
InstructionSet_X86Serialize_X64=64,
InstructionSet_AVX512F_X64=65,
InstructionSet_AVX512BW_X64=66,
InstructionSet_AVX512CD_X64=67,
InstructionSet_AVX512DQ_X64=68,
InstructionSet_AVX512VBMI_X64=69,
InstructionSet_AVX10v1_X64=70,
InstructionSet_AVX10v1_V512_X64=71,
InstructionSet_AVX10v2_X64=72,
InstructionSet_AVX10v2_V512_X64=73,
InstructionSet_GFNI_X64=74,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -158,33 +162,37 @@ enum CORINFO_InstructionSet
InstructionSet_APX=41,
InstructionSet_AVX10v2=42,
InstructionSet_AVX10v2_V512=43,
InstructionSet_X86Base_X64=44,
InstructionSet_SSE_X64=45,
InstructionSet_SSE2_X64=46,
InstructionSet_SSE3_X64=47,
InstructionSet_SSSE3_X64=48,
InstructionSet_SSE41_X64=49,
InstructionSet_SSE42_X64=50,
InstructionSet_AVX_X64=51,
InstructionSet_AVX2_X64=52,
InstructionSet_AES_X64=53,
InstructionSet_BMI1_X64=54,
InstructionSet_BMI2_X64=55,
InstructionSet_FMA_X64=56,
InstructionSet_LZCNT_X64=57,
InstructionSet_PCLMULQDQ_X64=58,
InstructionSet_POPCNT_X64=59,
InstructionSet_AVXVNNI_X64=60,
InstructionSet_X86Serialize_X64=61,
InstructionSet_AVX512F_X64=62,
InstructionSet_AVX512BW_X64=63,
InstructionSet_AVX512CD_X64=64,
InstructionSet_AVX512DQ_X64=65,
InstructionSet_AVX512VBMI_X64=66,
InstructionSet_AVX10v1_X64=67,
InstructionSet_AVX10v1_V512_X64=68,
InstructionSet_AVX10v2_X64=69,
InstructionSet_AVX10v2_V512_X64=70,
InstructionSet_GFNI=44,
InstructionSet_GFNI_V256=45,
InstructionSet_GFNI_V512=46,
InstructionSet_X86Base_X64=47,
InstructionSet_SSE_X64=48,
InstructionSet_SSE2_X64=49,
InstructionSet_SSE3_X64=50,
InstructionSet_SSSE3_X64=51,
InstructionSet_SSE41_X64=52,
InstructionSet_SSE42_X64=53,
InstructionSet_AVX_X64=54,
InstructionSet_AVX2_X64=55,
InstructionSet_AES_X64=56,
InstructionSet_BMI1_X64=57,
InstructionSet_BMI2_X64=58,
InstructionSet_FMA_X64=59,
InstructionSet_LZCNT_X64=60,
InstructionSet_PCLMULQDQ_X64=61,
InstructionSet_POPCNT_X64=62,
InstructionSet_AVXVNNI_X64=63,
InstructionSet_X86Serialize_X64=64,
InstructionSet_AVX512F_X64=65,
InstructionSet_AVX512BW_X64=66,
InstructionSet_AVX512CD_X64=67,
InstructionSet_AVX512DQ_X64=68,
InstructionSet_AVX512VBMI_X64=69,
InstructionSet_AVX10v1_X64=70,
InstructionSet_AVX10v1_V512_X64=71,
InstructionSet_AVX10v2_X64=72,
InstructionSet_AVX10v2_V512_X64=73,
InstructionSet_GFNI_X64=74,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -354,6 +362,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_AVX10v2_X64);
if (HasInstructionSet(InstructionSet_AVX10v2_V512))
AddInstructionSet(InstructionSet_AVX10v2_V512_X64);
if (HasInstructionSet(InstructionSet_GFNI))
AddInstructionSet(InstructionSet_GFNI_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
Expand Down Expand Up @@ -542,6 +552,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512))
resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512_X64);
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_GFNI_X64))
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -618,6 +632,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41))
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX))
resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1))
Expand Down Expand Up @@ -736,6 +760,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41))
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX))
resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1))
Expand Down Expand Up @@ -984,6 +1018,14 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX10v2_V512";
case InstructionSet_AVX10v2_V512_X64 :
return "AVX10v2_V512_X64";
case InstructionSet_GFNI :
return "GFNI";
case InstructionSet_GFNI_X64 :
return "GFNI_X64";
case InstructionSet_GFNI_V256 :
return "GFNI_V256";
case InstructionSet_GFNI_V512 :
return "GFNI_V512";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -1072,6 +1114,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX10v2";
case InstructionSet_AVX10v2_V512 :
return "AVX10v2_V512";
case InstructionSet_GFNI :
return "GFNI";
case InstructionSet_GFNI_V256 :
return "GFNI_V256";
case InstructionSet_GFNI_V512 :
return "GFNI_V512";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -1147,6 +1195,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX;
case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2;
case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512;
case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI;
case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256;
case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand Down Expand Up @@ -1189,6 +1240,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX;
case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2;
case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512;
case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI;
case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256;
case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512;
#endif // TARGET_X86

default:
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 9ed85c09-d33d-4855-80ea-e3b7330e8173 */
0x9ed85c09,
0xd33d,
0x4855,
{0x80, 0xea, 0xe3, 0xb7, 0x33, 0x0e, 0x81, 0x73}
constexpr GUID JITEEVersionIdentifier = { /* 64146448-11b1-4f94-b1f2-edce91fbcb33 */
0x64146448,
0x11b1,
0x4f94,
{0xb1, 0xf2, 0xed, 0xce, 0x91, 0xfb, 0xcb, 0x33}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Pclmulqdq_V512=50,
READYTORUN_INSTRUCTION_Avx10v2=51,
READYTORUN_INSTRUCTION_Avx10v2_V512=52,
READYTORUN_INSTRUCTION_Gfni=53,
READYTORUN_INSTRUCTION_Gfni_V256=54,
READYTORUN_INSTRUCTION_Gfni_V512=55,

};

Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6162,6 +6162,13 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
instructionSetFlags.AddInstructionSet(InstructionSet_FMA);
}

if (JitConfig.EnableGFNI() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_GFNI);
instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V256);
instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V512);
}

if (JitConfig.EnableLZCNT() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT);
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,12 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
return false;
}

case INS_gf2p8affineinvqb:
case INS_gf2p8affineqb:
{
return TakesVexPrefix(ins);
}

default:
{
unreached();
Expand Down Expand Up @@ -19836,6 +19842,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vpdpwssd:
case INS_vpdpbusds:
case INS_vpdpwssds:
case INS_gf2p8affineinvqb:
case INS_gf2p8affineqb:
case INS_gf2p8mulb:
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_5C;
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,9 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // APX
{ NI_Illegal, NI_Illegal }, // AVX10v2
{ NI_Illegal, NI_Illegal }, // AVX10v2_V512
{ FIRST_NI_GFNI, LAST_NI_GFNI },
{ FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 },
{ FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 },
{ FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 },
{ FIRST_NI_SSE_X64, LAST_NI_SSE_X64 },
{ FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 },
Expand Down Expand Up @@ -836,6 +839,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64
{ NI_Illegal, NI_Illegal }, // AVX10v2_X64
{ NI_Illegal, NI_Illegal }, // AVX10v2_V512_X64
{ NI_Illegal, NI_Illegal }, // GFNI_X64
#elif defined (TARGET_ARM64)
{ FIRST_NI_ArmBase, LAST_NI_ArmBase },
{ FIRST_NI_AdvSimd, LAST_NI_AdvSimd },
Expand Down
Loading

0 comments on commit 35f2b13

Please sign in to comment.