Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use ruapu detection only on windows arm, enable cpu powerinfo with mingw compiler #5593

Merged
merged 4 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,9 @@
#include <emscripten/threading.h>
#endif

#if defined _WIN32 && !(defined __MINGW32__)
#if defined _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <powerbase.h>
#endif

#if defined __ANDROID__ || defined __linux__
Expand Down Expand Up @@ -129,8 +128,10 @@
#include <immintrin.h>
#endif

#if (defined _WIN32 && (__aarch64__ || __arm__))
#define RUAPU_IMPLEMENTATION
#include "ruapu.h"
#endif

// topology info
static int g_cpucount;
Expand Down Expand Up @@ -596,9 +597,6 @@ static int get_cpu_support_x86_avx2()

static int get_cpu_support_x86_avx_vnni()
{
#if __APPLE__
return ruapu_supports("avxvnni");
#else
unsigned int cpu_info[4] = {0};
x86_cpuid(0, cpu_info);

Expand All @@ -617,13 +615,16 @@ static int get_cpu_support_x86_avx_vnni()

x86_cpuid_sublevel(7, 1, cpu_info);
return cpu_info[0] & (1u << 4);
#endif
}

static int get_cpu_support_x86_avx512()
{
#if __APPLE__
return ruapu_supports("avx512f") && ruapu_supports("avx512bw") && ruapu_supports("avx512cd") && ruapu_supports("avx512dq") && ruapu_supports("avx512vl");
return get_hw_capability("hw.optional.avx512f")
&& get_hw_capability("hw.optional.avx512bw")
&& get_hw_capability("hw.optional.avx512cd")
&& get_hw_capability("hw.optional.avx512dq")
&& get_hw_capability("hw.optional.avx512vl");
#else
unsigned int cpu_info[4] = {0};
x86_cpuid(0, cpu_info);
Expand Down Expand Up @@ -653,7 +654,7 @@ static int get_cpu_support_x86_avx512()
static int get_cpu_support_x86_avx512_vnni()
{
#if __APPLE__
return ruapu_supports("avx512vnni");
return get_hw_capability("hw.optional.avx512vnni");
#else
unsigned int cpu_info[4] = {0};
x86_cpuid(0, cpu_info);
Expand Down Expand Up @@ -683,7 +684,7 @@ static int get_cpu_support_x86_avx512_vnni()
static int get_cpu_support_x86_avx512_bf16()
{
#if __APPLE__
return ruapu_supports("avx512bf16");
return get_hw_capability("hw.optional.avx512bf16");
#else
unsigned int cpu_info[4] = {0};
x86_cpuid(0, cpu_info);
Expand All @@ -709,7 +710,7 @@ static int get_cpu_support_x86_avx512_bf16()
static int get_cpu_support_x86_avx512_fp16()
{
#if __APPLE__
return ruapu_supports("avx512fp16");
return get_hw_capability("hw.optional.avx512fp16");
#else
unsigned int cpu_info[4] = {0};
x86_cpuid(0, cpu_info);
Expand Down Expand Up @@ -745,7 +746,7 @@ static int get_cpucount()
count = emscripten_num_logical_cores();
else
count = 1;
#elif (defined _WIN32 && !(defined __MINGW32__))
#elif defined _WIN32
SYSTEM_INFO system_info;
GetSystemInfo(&system_info);
count = system_info.dwNumberOfProcessors;
Expand Down Expand Up @@ -812,7 +813,7 @@ static int get_thread_siblings(int cpuid)
static int get_physical_cpucount()
{
int count = 0;
#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
if (glpi == NULL)
Expand Down Expand Up @@ -1050,7 +1051,7 @@ static int get_big_cpu_data_cache_size(int level)
static int get_cpu_level2_cachesize()
{
int size = 0;
#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
if (glpi != NULL)
Expand Down Expand Up @@ -1120,7 +1121,7 @@ static int get_cpu_level2_cachesize()
static int get_cpu_level3_cachesize()
{
int size = 0;
#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
if (glpi != NULL)
Expand Down Expand Up @@ -1167,7 +1168,7 @@ static int get_cpu_level3_cachesize()
return size;
}

#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
static ncnn::CpuSet get_smt_cpu_mask()
{
ncnn::CpuSet smt_cpu_mask;
Expand Down Expand Up @@ -1261,7 +1262,7 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask)

return 0;
}
#endif // (defined _WIN32 && !(defined __MINGW32__))
#endif // defined _WIN32

#if defined __ANDROID__ || defined __linux__
static int get_max_freq_khz(int cpuid)
Expand Down Expand Up @@ -1435,7 +1436,7 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp
mask_all.enable(i);
}

#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
// get max freq mhz for all cores
int max_freq_mhz_min = INT_MAX;
int max_freq_mhz_max = 0;
Expand Down Expand Up @@ -1867,7 +1868,7 @@ static void initialize_global_cpu_info()
g_powersave = 0;
initialize_cpu_thread_affinity_mask(g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big);

#if (defined _WIN32 && (__aarch64__ || __arm__)) || __APPLE__
#if (defined _WIN32 && (__aarch64__ || __arm__))
if (!is_being_debugged())
{
ruapu_init();
Expand Down Expand Up @@ -1944,7 +1945,7 @@ static inline void try_initialize_global_cpu_info()

namespace ncnn {

#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
CpuSet::CpuSet()
{
disable_all();
Expand Down Expand Up @@ -2685,7 +2686,7 @@ const CpuSet& get_cpu_thread_affinity_mask(int powersave)
int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask)
{
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__ || (defined _WIN32 && !(defined __MINGW32__))
#if defined __ANDROID__ || defined __linux__ || defined _WIN32
#ifdef _OPENMP
int num_threads = thread_affinity_mask.num_enabled();

Expand Down
4 changes: 2 additions & 2 deletions src/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <stddef.h>

#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
Expand All @@ -40,7 +40,7 @@ class NCNN_EXPORT CpuSet
int num_enabled() const;

public:
#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
ULONG_PTR mask;
#endif
#if defined __ANDROID__ || defined __linux__
Expand Down
8 changes: 4 additions & 4 deletions src/platform.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
#ifdef __cplusplus

#if NCNN_THREADS
#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <process.h>
Expand All @@ -86,7 +86,7 @@
namespace ncnn {

#if NCNN_THREADS
#if (defined _WIN32 && !(defined __MINGW32__))
#if defined _WIN32
class NCNN_EXPORT Mutex
{
public:
Expand Down Expand Up @@ -141,7 +141,7 @@ public:
private:
DWORD key;
};
#else // (defined _WIN32 && !(defined __MINGW32__))
#else // defined _WIN32
class NCNN_EXPORT Mutex
{
public:
Expand Down Expand Up @@ -186,7 +186,7 @@ public:
private:
pthread_key_t key;
};
#endif // (defined _WIN32 && !(defined __MINGW32__))
#endif // defined _WIN32
#else // NCNN_THREADS
class NCNN_EXPORT Mutex
{
Expand Down
Loading