Skip to content

Commit

Permalink
[OpenMP][host runtime] Add initial hybrid CPU support
Browse files Browse the repository at this point in the history
Detect, through CPUID.1A, and show user different core types through
KMP_AFFINITY=verbose mechanism. Offer future runtime optimizations
 __kmp_is_hybrid_cpu() to know whether running on a hybrid system or not.

Differential Revision: https://reviews.llvm.org/D110435
  • Loading branch information
jpeyton52 committed Oct 14, 2021
1 parent b840d3a commit acb3b18
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 1 deletion.
1 change: 1 addition & 0 deletions openmp/runtime/src/i18n/en_US.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the d
OmpNoAllocator "Allocator %1$s is not available, will use default allocator."
TopologyGeneric "%1$s: %2$s (%3$d total cores)"
AffGranularityBad "%1$s: granularity setting: %2$s does not exist in topology. Using granularity=%3$s instead."
TopologyHybrid "%1$s: hybrid core type detected: %2$d %3$s cores."

# --- OpenMP errors detected at runtime ---
#
Expand Down
6 changes: 5 additions & 1 deletion openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,8 @@ typedef struct kmp_cpuid {
typedef struct kmp_cpuinfo_flags_t {
unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
unsigned reserved : 30; // Ensure size of 32 bits
unsigned hybrid : 1;
unsigned reserved : 29; // Ensure size of 32 bits
} kmp_cpuinfo_flags_t;

typedef struct kmp_cpuinfo {
Expand Down Expand Up @@ -2984,6 +2985,9 @@ extern int __kmp_storage_map_verbose_specified;

#if KMP_ARCH_X86 || KMP_ARCH_X86_64
extern kmp_cpuinfo_t __kmp_cpuinfo;
static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
#else
static inline bool __kmp_is_hybrid_cpu() { return false; }
#endif

extern volatile int __kmp_init_serial;
Expand Down
75 changes: 75 additions & 0 deletions openmp/runtime/src/kmp_affinity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,20 @@ const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
return ((plural) ? "unknowns" : "unknown");
}

const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
switch (type) {
case KMP_HW_CORE_TYPE_UNKNOWN:
return "unknown";
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
case KMP_HW_CORE_TYPE_ATOM:
return "Intel Atom(R) processor";
case KMP_HW_CORE_TYPE_CORE:
return "Intel(R) Core(TM) processor";
#endif
}
return "unknown";
}

////////////////////////////////////////////////////////////////////////////////
// kmp_hw_thread_t methods
int kmp_hw_thread_t::compare_ids(const void *a, const void *b) {
Expand Down Expand Up @@ -174,6 +188,9 @@ void kmp_hw_thread_t::print() const {
for (int i = 0; i < depth; ++i) {
printf("%4d ", ids[i]);
}
if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) {
printf(" (%s)", __kmp_hw_get_core_type_string(core_type));
}
printf("\n");
}

Expand Down Expand Up @@ -298,13 +315,20 @@ void kmp_topology_t::_set_last_level_cache() {
void kmp_topology_t::_gather_enumeration_information() {
int previous_id[KMP_HW_LAST];
int max[KMP_HW_LAST];
int previous_core_id = kmp_hw_thread_t::UNKNOWN_ID;

for (int i = 0; i < depth; ++i) {
previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
max[i] = 0;
count[i] = 0;
ratio[i] = 0;
}
if (__kmp_is_hybrid_cpu()) {
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
core_types_count[i] = 0;
core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
}
}
for (int i = 0; i < num_hw_threads; ++i) {
kmp_hw_thread_t &hw_thread = hw_threads[i];
for (int layer = 0; layer < depth; ++layer) {
Expand All @@ -326,6 +350,15 @@ void kmp_topology_t::_gather_enumeration_information() {
for (int layer = 0; layer < depth; ++layer) {
previous_id[layer] = hw_thread.ids[layer];
}
// Figure out the number of each core type for hybrid CPUs
if (__kmp_is_hybrid_cpu()) {
int core_level = get_level(KMP_HW_CORE);
if (core_level != -1) {
if (hw_thread.ids[core_level] != previous_core_id)
_increment_core_type(hw_thread.core_type);
previous_core_id = hw_thread.ids[core_level];
}
}
}
for (int layer = 0; layer < depth; ++layer) {
if (max[layer] > ratio[layer])
Expand Down Expand Up @@ -478,6 +511,19 @@ void kmp_topology_t::dump() const {
}
printf("\n");

printf("* core_types:\n");
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) {
printf(" %d %s core%c\n", core_types_count[i],
__kmp_hw_get_core_type_string(core_types[i]),
((core_types_count[i] > 1) ? 's' : ' '));
} else {
if (i == 0)
printf("No hybrid information available\n");
break;
}
}

printf("* equivalent map:\n");
KMP_FOREACH_HW_TYPE(i) {
const char *key = __kmp_hw_get_keyword(i);
Expand Down Expand Up @@ -571,6 +617,15 @@ void kmp_topology_t::print(const char *env_var) const {
}
KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);

if (__kmp_is_hybrid_cpu()) {
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN)
break;
KMP_INFORM(TopologyHybrid, env_var, core_types_count[i],
__kmp_hw_get_core_type_string(core_types[i]));
}
}

if (num_hw_threads <= 0) {
__kmp_str_buf_free(&buf);
return;
Expand All @@ -585,6 +640,9 @@ void kmp_topology_t::print(const char *env_var) const {
__kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type));
__kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]);
}
if (__kmp_is_hybrid_cpu())
__kmp_str_buf_print(
&buf, "(%s)", __kmp_hw_get_core_type_string(hw_threads[i].core_type));
KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
}

Expand Down Expand Up @@ -1782,6 +1840,16 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
return true;
}

// Hybrid cpu detection using CPUID.1A
// Thread should be pinned to processor already
static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type,
unsigned *native_model_id) {
kmp_cpuid buf;
__kmp_x86_cpuid(0x1a, 0, &buf);
*type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
*native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
}

// Intel(R) microarchitecture code name Nehalem, Dunnington and later
// architectures support a newer interface for specifying the x2APIC Ids,
// based on CPUID.B or CPUID.1F
Expand Down Expand Up @@ -2051,6 +2119,13 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
}
}
// Hybrid information
if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
kmp_hw_core_type_t type;
unsigned native_model_id;
__kmp_get_hybrid_info(&type, &native_model_id);
hw_thread.core_type = type;
}
hw_thread_index++;
}
KMP_ASSERT(hw_thread_index > 0);
Expand Down
33 changes: 33 additions & 0 deletions openmp/runtime/src/kmp_affinity.h
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,17 @@ class KMPNativeAffinity : public KMPAffinity {
#endif /* KMP_OS_WINDOWS */
#endif /* KMP_AFFINITY_SUPPORTED */

typedef enum kmp_hw_core_type_t {
KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
KMP_HW_CORE_TYPE_ATOM = 0x20,
KMP_HW_CORE_TYPE_CORE = 0x40,
KMP_HW_MAX_NUM_CORE_TYPES = 3,
#else
KMP_HW_MAX_NUM_CORE_TYPES = 1,
#endif
} kmp_hw_core_type_t;

class kmp_hw_thread_t {
public:
static const int UNKNOWN_ID = -1;
Expand All @@ -607,11 +618,14 @@ class kmp_hw_thread_t {
int sub_ids[KMP_HW_LAST];
bool leader;
int os_id;
kmp_hw_core_type_t core_type;

void print() const;
void clear() {
for (int i = 0; i < (int)KMP_HW_LAST; ++i)
ids[i] = UNKNOWN_ID;
leader = false;
core_type = KMP_HW_CORE_TYPE_UNKNOWN;
}
};

Expand All @@ -637,6 +651,11 @@ class kmp_topology_t {
// Storage containing the absolute number of each topology layer
int *count;

// Storage containing the core types and the number of
// each core type for hybrid processors
kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
int core_types_count[KMP_HW_MAX_NUM_CORE_TYPES];

// The hardware threads array
// hw_threads is num_hw_threads long
// Each hw_thread's ids and sub_ids are depth deep
Expand Down Expand Up @@ -675,6 +694,20 @@ class kmp_topology_t {
// Set the last level cache equivalent type
void _set_last_level_cache();

// Increments the number of cores of type 'type'
void _increment_core_type(kmp_hw_core_type_t type) {
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN) {
core_types[i] = type;
core_types_count[i] = 1;
break;
} else if (core_types[i] == type) {
core_types_count[i]++;
break;
}
}
}

public:
// Force use of allocate()/deallocate()
kmp_topology_t() = delete;
Expand Down
6 changes: 6 additions & 0 deletions openmp/runtime/src/kmp_utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,19 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
}
#endif
p->flags.rtm = 0;
p->flags.hybrid = 0;
if (max_arg > 7) {
/* RTM bit CPUID.07:EBX, bit 11 */
/* HYRBID bit CPUID.07:EDX, bit 15 */
__kmp_x86_cpuid(7, 0, &buf);
p->flags.rtm = (buf.ebx >> 11) & 1;
p->flags.hybrid = (buf.edx >> 15) & 1;
if (p->flags.rtm) {
KA_TRACE(trace_level, (" RTM"));
}
if (p->flags.hybrid) {
KA_TRACE(trace_level, (" HYBRID"));
}
}
}

Expand Down

0 comments on commit acb3b18

Please sign in to comment.