Skip to content

Commit

Permalink
[arm64] Add RCPC ISA (8.3+) and use ldap for volatile reads (#67384)
Browse files Browse the repository at this point in the history
Co-authored-by: Adeel Mujahid <3840695+am11@users.noreply.github.com>
  • Loading branch information
EgorBo and am11 authored Apr 12, 2022
1 parent 3635e0f commit e9915df
Show file tree
Hide file tree
Showing 16 changed files with 69 additions and 15 deletions.
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Dp, W("EnableArm64Dp"), 1
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rdm, W("EnableArm64Rdm"), 1, "Allows Arm64 Rdm+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha1, W("EnableArm64Sha1"), 1, "Allows Arm64 Sha1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha256, W("EnableArm64Sha256"), 1, "Allows Arm64 Sha256+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc, W("EnableArm64Rcpc"), 1, "Allows Arm64 Rcpc+ hardware intrinsics to be disabled")
#endif

///
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum CORINFO_InstructionSet
InstructionSet_Rdm_Arm64=18,
InstructionSet_Sha1_Arm64=19,
InstructionSet_Sha256_Arm64=20,
InstructionSet_Rcpc=21,
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -486,6 +487,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector128";
case InstructionSet_Dczva :
return "Dczva";
case InstructionSet_Rcpc :
return "Rcpc";
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case InstructionSet_X86Base :
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* b2d3c86f-87fd-4724-9e5d-4c44905eba91 */
0xb2d3c86f,
0x87fd,
0x4724,
{0x9e, 0x5d, 0x4c, 0x44, 0x90, 0x5e, 0xba, 0x91}
constexpr GUID JITEEVersionIdentifier = { /* 206a7aa6-9f5c-47c1-b63b-54f4cb169ee3 */
0x206a7aa6,
0x9f5c,
0x47c1,
{0xb6, 0x3b, 0x54, 0xf4, 0xcb, 0x16, 0x9e, 0xe3}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5233,6 +5233,12 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14);
theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15);

// ldapr Rt, [reg]
theEmitter->emitIns_R_R(INS_ldapr, EA_8BYTE, REG_R9, REG_R8);
theEmitter->emitIns_R_R(INS_ldapr, EA_4BYTE, REG_R7, REG_R10);
theEmitter->emitIns_R_R(INS_ldaprb, EA_4BYTE, REG_R5, REG_R11);
theEmitter->emitIns_R_R(INS_ldaprh, EA_4BYTE, REG_R5, REG_R12);

// ldaxr Rt, [reg]
theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8);
theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10);
Expand Down
10 changes: 7 additions & 3 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1884,17 +1884,21 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
bool addrIsInReg = tree->Addr()->isUsedFromReg();
bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);

// on arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid
// full memory barriers if mixed with STLR
bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);

if ((ins == INS_ldrb) && addrIsInReg)
{
ins = INS_ldarb;
ins = hasRcpc ? INS_ldaprb : INS_ldarb;
}
else if ((ins == INS_ldrh) && addrIsInReg && addrIsAligned)
{
ins = INS_ldarh;
ins = hasRcpc ? INS_ldaprh : INS_ldarh;
}
else if ((ins == INS_ldr) && addrIsInReg && addrIsAligned && genIsValidIntReg(targetReg))
{
ins = INS_ldar;
ins = hasRcpc ? INS_ldapr : INS_ldar;
}
else
#endif // TARGET_ARM64
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
{
case INS_ldxrb:
case INS_ldarb:
case INS_ldaprb:
case INS_ldaxrb:
case INS_stxrb:
case INS_stlrb:
Expand All @@ -1145,6 +1146,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)

case INS_ldxrh:
case INS_ldarh:
case INS_ldaprh:
case INS_ldaxrh:
case INS_stxrh:
case INS_stlrh:
Expand Down Expand Up @@ -1181,6 +1183,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)

case INS_ldxr:
case INS_ldar:
case INS_ldapr:
case INS_ldaxr:
case INS_stxr:
case INS_stlr:
Expand Down Expand Up @@ -1212,6 +1215,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
switch (ins)
{
case INS_ldarb:
case INS_ldaprb:
case INS_stlrb:
case INS_ldrb:
case INS_strb:
Expand All @@ -1223,6 +1227,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
break;

case INS_ldarh:
case INS_ldaprh:
case INS_stlrh:
case INS_ldrh:
case INS_strh:
Expand All @@ -1247,6 +1252,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
break;

case INS_ldar:
case INS_ldapr:
case INS_stlr:
case INS_ldr:
case INS_str:
Expand Down Expand Up @@ -4460,6 +4466,7 @@ void emitter::emitIns_R_R(
break;

case INS_ldar:
case INS_ldapr:
case INS_ldaxr:
case INS_ldxr:
case INS_stlr:
Expand All @@ -4468,9 +4475,11 @@ void emitter::emitIns_R_R(
FALLTHROUGH;

case INS_ldarb:
case INS_ldaprb:
case INS_ldaxrb:
case INS_ldxrb:
case INS_ldarh:
case INS_ldaprh:
case INS_ldaxrh:
case INS_ldxrh:
case INS_stlrb:
Expand Down Expand Up @@ -14206,7 +14215,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;

case IF_LS_2A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (no immediate)
// ldar, ldarb, ldarh, ldxr, ldxrb, ldxrh,
// ldar, ldarb, ldarh, ldapr, ldaprb, ldaprh, ldxr, ldxrb, ldxrh,
// ldaxr, ldaxrb, ldaxrh, stlr, stlrb, stlrh

result.insThroughput = PERFSCORE_THROUGHPUT_1C;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/instrsarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,17 @@ INST1(ldarb, "ldarb", LD, IF_LS_2A, 0x08DFFC00)
INST1(ldarh, "ldarh", LD, IF_LS_2A, 0x48DFFC00)
// ldarh Rt,[Xn] LS_2A 0100100011011111 111111nnnnnttttt 48DF FC00


INST1(ldapr, "ldapr", LD, IF_LS_2A, 0xB8BFC000)
// ldapr Rt,[Xn] LS_2A 1X11100010111111 110000nnnnnttttt B8BF C000 Rm Rt Rn ARMv8.3 LRCPC

INST1(ldaprb, "ldaprb", LD, IF_LS_2A, 0x38BFC000)
// ldaprb Rt,[Xn] LS_2A 0011100010111111 110000nnnnnttttt 38BF C000 Rm Rt Rn ARMv8.3 LRCPC

INST1(ldaprh, "ldaprh", LD, IF_LS_2A, 0x78BFC000)
// ldaprh Rt,[Xn] LS_2A 0111100010111111 110000nnnnnttttt 78BF C000 Rm Rt Rn ARMv8.3 LRCPC


INST1(ldxr, "ldxr", LD, IF_LS_2A, 0x885F7C00)
// ldxr Rt,[Xn] LS_2A 1X00100001011111 011111nnnnnttttt 885F 7C00

Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/nativeaot/Runtime/IntrinsicConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ enum ARM64IntrinsicConstants
ARM64IntrinsicConstants_Sha256 = 0x0100,
ARM64IntrinsicConstants_Atomics = 0x0200,
ARM64IntrinsicConstants_Vector64 = 0x0400,
ARM64IntrinsicConstants_Vector128 = 0x0800
ARM64IntrinsicConstants_Vector128 = 0x0800,
ARM64IntrinsicConstants_Rcpc = 0x1000
};
#endif //HOST_ARM64

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,8 +1219,8 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags)
// *flags |= ARM64IntrinsicConstants_???;
#endif
#ifdef HWCAP_LRCPC
// if (hwCap & HWCAP_LRCPC)
// *flags |= ARM64IntrinsicConstants_???;
if (hwCap & HWCAP_LRCPC)
*flags |= ARM64IntrinsicConstants_Rcpc;
#endif
#ifdef HWCAP_PMULL
// if (hwCap & HWCAP_PMULL)
Expand Down
11 changes: 8 additions & 3 deletions src/coreclr/pal/src/misc/jitsupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ static const CpuCapability CpuCapabilities[] = {
#endif
//{ "jscvt", HWCAP_JSCVT },
//{ "fcma", HWCAP_FCMA },
//{ "lrcpc", HWCAP_LRCPC },
#ifdef HWCAP_LRCPC
{ "lrcpc", HWCAP_LRCPC },
#endif
//{ "dcpop", HWCAP_DCPOP },
//{ "sha3", HWCAP_SHA3 },
//{ "sm3", HWCAP_SM3 },
Expand Down Expand Up @@ -208,8 +210,8 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)
// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_JSCVT);
#endif
#ifdef HWCAP_LRCPC
// if (hwCap & HWCAP_LRCPC)
// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_LRCPC);
if (hwCap & HWCAP_LRCPC)
flags->Set(InstructionSet_Rcpc);
#endif
#ifdef HWCAP_PMULL
// if (hwCap & HWCAP_PMULL)
Expand Down Expand Up @@ -280,6 +282,9 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)

if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
flags->Set(InstructionSet_Atomics);

if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
flags->Set(InstructionSet_Rcpc);
#endif // HAVE_SYSCTLBYNAME
// CoreCLR SIMD and FP support is included in ARM64 baseline
// On exceptional basis platforms may leave out support, but CoreCLR does not
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public static class ReadyToRunInstructionSetHelper
case InstructionSet.ARM64_Vector64: return null;
case InstructionSet.ARM64_Vector128: return null;
case InstructionSet.ARM64_Dczva: return null;
case InstructionSet.ARM64_Rcpc: return null;

default: throw new Exception("Unknown instruction set");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public enum InstructionSet
ARM64_Rdm_Arm64 = InstructionSet_ARM64.Rdm_Arm64,
ARM64_Sha1_Arm64 = InstructionSet_ARM64.Sha1_Arm64,
ARM64_Sha256_Arm64 = InstructionSet_ARM64.Sha256_Arm64,
ARM64_Rcpc = InstructionSet_ARM64.Rcpc,
X64_X86Base = InstructionSet_X64.X86Base,
X64_SSE = InstructionSet_X64.SSE,
X64_SSE2 = InstructionSet_X64.SSE2,
Expand Down Expand Up @@ -136,6 +137,7 @@ public enum InstructionSet_ARM64
Rdm_Arm64 = 18,
Sha1_Arm64 = 19,
Sha256_Arm64 = 20,
Rcpc = 21,
}

public enum InstructionSet_X64
Expand Down Expand Up @@ -740,6 +742,7 @@ public static IEnumerable<InstructionSetInfo> ArchitectureToValidInstructionSets
yield return new InstructionSetInfo("Vector64", "", InstructionSet.ARM64_Vector64, false);
yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false);
yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false);
yield return new InstructionSetInfo("Rcpc", "", InstructionSet.ARM64_Rcpc, false);
break;

case TargetArchitecture.X64:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ instructionset64bit,ARM64 ,Dp
instructionset64bit,ARM64 ,Rdm
instructionset64bit,ARM64 ,Sha1
instructionset64bit,ARM64 ,Sha256
instructionset ,ARM64 , , , ,Rcpc ,

vectorinstructionset,ARM64,Vector64
vectorinstructionset,ARM64,Vector128
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ private static class Arm64IntrinsicConstants
public const int Atomics = 0x0200;
public const int Vector64 = 0x0400;
public const int Vector128 = 0x0800;
public const int Rcpc = 0x1000;

public static int FromHardwareIntrinsicId(string id)
{
Expand All @@ -207,6 +208,7 @@ public static int FromHardwareIntrinsicId(string id)
"Atomics" => Atomics,
"Vector64" => Vector64,
"Vector128" => Vector128,
"Rcpc" => Rcpc,
_ => throw new NotSupportedException(),
};
}
Expand All @@ -231,6 +233,7 @@ public static int FromInstructionSetFlags(InstructionSetFlags instructionSets)
InstructionSet.ARM64_Atomics => Atomics,
InstructionSet.ARM64_Vector64 => Vector64,
InstructionSet.ARM64_Vector128 => Vector128,
InstructionSet.ARM64_Rcpc => Rcpc,
_ => throw new NotSupportedException()
};
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/tools/aot/ILCompiler/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ private int Run(string[] args)
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc");
}

optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _,
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/vm/codeman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1592,6 +1592,11 @@ void EEJitManager::SetCpuInfo()
CPUCompileFlags.Clear(InstructionSet_Atomics);
}

if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc))
{
CPUCompileFlags.Clear(InstructionSet_Rcpc);
}

if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32))
{
CPUCompileFlags.Clear(InstructionSet_Crc32);
Expand Down

0 comments on commit e9915df

Please sign in to comment.