Skip to content

Commit

Permalink
Add feature detection for ARM/MacOS (#41924)
Browse files Browse the repository at this point in the history
  • Loading branch information
gbaraldi authored Feb 15, 2022
1 parent cc345f6 commit 9542577
Showing 1 changed file with 59 additions and 19 deletions.
78 changes: 59 additions & 19 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
# undef USE_DYN_GETAUXVAL
# include <sys/auxv.h>
# endif
#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_
#include <sys/sysctl.h>
#include <string.h>
#endif

namespace ARM {
Expand Down Expand Up @@ -160,6 +163,8 @@ enum class CPU : uint32_t {
apple_a11,
apple_a12,
apple_a13,
apple_a14,
apple_m1,
apple_s4,
apple_s5,

Expand Down Expand Up @@ -240,6 +245,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2);
constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm);
constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2);
constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint);
constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2);
constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);

// For ARM cores, the features required can be found in the technical reference manual
Expand Down Expand Up @@ -342,6 +348,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
// and sysctl -a hw.optional
constexpr auto apple_s4 = apple_a12;
constexpr auto apple_s5 = apple_a12;

Expand Down Expand Up @@ -420,6 +430,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11},
{"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
{"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
{"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
{"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
{"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
{"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
{"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
Expand Down Expand Up @@ -662,13 +674,47 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2},
{"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3},
{"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4},
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000,
Feature::samsung_exynos_m5},
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5},
{"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7},
};
#endif
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);

static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
{
return ::find_cpu(cpu, cpus, ncpu_names);
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
{
return ::find_cpu(name, cpus, ncpu_names);
}

static inline const char *find_cpu_name(uint32_t cpu)
{
return ::find_cpu_name(cpu, cpus, ncpu_names);
}

#if defined _CPU_AARCH64_ && defined _OS_DARWIN_

static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
{
char buffer[128];
size_t bufferlen = 128;
sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0);

if(strcmp(buffer,"Apple M1") == 0)
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
else if(strcmp(buffer,"Apple M1 Max") == 0)
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
else if(strcmp(buffer,"Apple M1 Pro") == 0)
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
else
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
}

#else

// auxval reader

#ifndef AT_HWCAP
Expand Down Expand Up @@ -974,7 +1020,7 @@ static CPU get_cpu_name(CPUID cpuid)
default: return CPU::generic;
}
case 0x61: // 'a': Apple
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
// https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html
switch (cpuid.part) {
case 0x0: // Swift
return CPU::apple_swift;
Expand Down Expand Up @@ -1002,6 +1048,12 @@ static CPU get_cpu_name(CPUID cpuid)
case 0x12: // Lightning
case 0x13: // Thunder
return CPU::apple_a13;
case 0x20: // Icestorm
case 0x21: // Firestorm
return CPU::apple_a14;
case 0x22: // Icestorm m1
case 0x23: // Firestorm m1
return CPU::apple_m1;
default: return CPU::generic;
}
case 0x68: // 'h': Huaxintong Semiconductor
Expand All @@ -1019,6 +1071,9 @@ static CPU get_cpu_name(CPUID cpuid)
}
}




namespace {

struct arm_arch {
Expand Down Expand Up @@ -1062,21 +1117,6 @@ static arm_arch get_elf_arch(void)
#endif
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
{
return ::find_cpu(cpu, cpus, ncpu_names);
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
{
return ::find_cpu(name, cpus, ncpu_names);
}

static inline const char *find_cpu_name(uint32_t cpu)
{
return ::find_cpu_name(cpu, cpus, ncpu_names);
}

static arm_arch feature_arch_version(const FeatureList<feature_sz> &feature)
{
#ifdef _CPU_AARCH64_
Expand Down Expand Up @@ -1303,9 +1343,9 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
}
// Ignore feature bits that we are not interested in.
mask_features(feature_masks, &features[0]);

return std::make_pair(cpu, features);
}
#endif

static inline const std::pair<uint32_t,FeatureList<feature_sz>> &get_host_cpu()
{
Expand Down

0 comments on commit 9542577

Please sign in to comment.