diff --git a/src/cpu.cpp b/src/cpu.cpp index b1afbba3f65..f9e64a1cc75 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -46,10 +46,9 @@ #include #endif -#if defined _WIN32 && !(defined __MINGW32__) +#if defined _WIN32 #define WIN32_LEAN_AND_MEAN #include -#include #endif #if defined __ANDROID__ || defined __linux__ @@ -129,8 +128,10 @@ #include #endif +#if (defined _WIN32 && (__aarch64__ || __arm__)) #define RUAPU_IMPLEMENTATION #include "ruapu.h" +#endif // topology info static int g_cpucount; @@ -596,9 +597,6 @@ static int get_cpu_support_x86_avx2() static int get_cpu_support_x86_avx_vnni() { -#if __APPLE__ - return ruapu_supports("avxvnni"); -#else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -617,13 +615,16 @@ static int get_cpu_support_x86_avx_vnni() x86_cpuid_sublevel(7, 1, cpu_info); return cpu_info[0] & (1u << 4); -#endif } static int get_cpu_support_x86_avx512() { #if __APPLE__ - return ruapu_supports("avx512f") && ruapu_supports("avx512bw") && ruapu_supports("avx512cd") && ruapu_supports("avx512dq") && ruapu_supports("avx512vl"); + return get_hw_capability("hw.optional.avx512f") + && get_hw_capability("hw.optional.avx512bw") + && get_hw_capability("hw.optional.avx512cd") + && get_hw_capability("hw.optional.avx512dq") + && get_hw_capability("hw.optional.avx512vl"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -653,7 +654,7 @@ static int get_cpu_support_x86_avx512() static int get_cpu_support_x86_avx512_vnni() { #if __APPLE__ - return ruapu_supports("avx512vnni"); + return get_hw_capability("hw.optional.avx512vnni"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -683,7 +684,7 @@ static int get_cpu_support_x86_avx512_vnni() static int get_cpu_support_x86_avx512_bf16() { #if __APPLE__ - return ruapu_supports("avx512bf16"); + return get_hw_capability("hw.optional.avx512bf16"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -709,7 +710,7 @@ static int get_cpu_support_x86_avx512_bf16() static int get_cpu_support_x86_avx512_fp16() { #if __APPLE__ - return ruapu_supports("avx512fp16"); + return get_hw_capability("hw.optional.avx512fp16"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -745,7 +746,7 @@ static int get_cpucount() count = emscripten_num_logical_cores(); else count = 1; -#elif (defined _WIN32 && !(defined __MINGW32__)) +#elif defined _WIN32 SYSTEM_INFO system_info; GetSystemInfo(&system_info); count = system_info.dwNumberOfProcessors; @@ -812,7 +813,7 @@ static int get_thread_siblings(int cpuid) static int get_physical_cpucount() { int count = 0; -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi == NULL) @@ -1050,7 +1051,7 @@ static int get_big_cpu_data_cache_size(int level) static int get_cpu_level2_cachesize() { int size = 0; -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi != NULL) @@ -1120,7 +1121,7 @@ static int get_cpu_level2_cachesize() static int get_cpu_level3_cachesize() { int size = 0; -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi != NULL) @@ -1167,7 +1168,7 @@ static int get_cpu_level3_cachesize() return size; } -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 static ncnn::CpuSet get_smt_cpu_mask() { ncnn::CpuSet smt_cpu_mask; @@ -1261,7 +1262,7 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) return 0; } -#endif // (defined _WIN32 && !(defined __MINGW32__)) +#endif // defined _WIN32 #if defined __ANDROID__ || defined __linux__ static int get_max_freq_khz(int cpuid) @@ -1435,7 +1436,7 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp mask_all.enable(i); } -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 // get max freq mhz for all cores int max_freq_mhz_min = INT_MAX; int max_freq_mhz_max = 0; @@ -1867,7 +1868,7 @@ static void initialize_global_cpu_info() g_powersave = 0; initialize_cpu_thread_affinity_mask(g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big); -#if (defined _WIN32 && (__aarch64__ || __arm__)) || __APPLE__ +#if (defined _WIN32 && (__aarch64__ || __arm__)) if (!is_being_debugged()) { ruapu_init(); @@ -1944,7 +1945,7 @@ static inline void try_initialize_global_cpu_info() namespace ncnn { -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 CpuSet::CpuSet() { disable_all(); @@ -2685,7 +2686,7 @@ const CpuSet& get_cpu_thread_affinity_mask(int powersave) int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask) { try_initialize_global_cpu_info(); -#if defined __ANDROID__ || defined __linux__ || (defined _WIN32 && !(defined __MINGW32__)) +#if defined __ANDROID__ || defined __linux__ || defined _WIN32 #ifdef _OPENMP int num_threads = thread_affinity_mask.num_enabled(); diff --git a/src/cpu.h b/src/cpu.h index 7d6bfce1108..2ae6b8c3ffe 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -17,7 +17,7 @@ #include -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 #define WIN32_LEAN_AND_MEAN #include #endif @@ -40,7 +40,7 @@ class NCNN_EXPORT CpuSet int num_enabled() const; public: -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 ULONG_PTR mask; #endif #if defined __ANDROID__ || defined __linux__ diff --git a/src/platform.h.in b/src/platform.h.in index a0f17f39e31..50a9454b7da 100644 --- a/src/platform.h.in +++ b/src/platform.h.in @@ -70,7 +70,7 @@ #ifdef __cplusplus #if NCNN_THREADS -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 #define WIN32_LEAN_AND_MEAN #include #include @@ -86,7 +86,7 @@ namespace ncnn { #if NCNN_THREADS -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 class NCNN_EXPORT Mutex { public: @@ -141,7 +141,7 @@ public: private: DWORD key; }; -#else // (defined _WIN32 && !(defined __MINGW32__)) +#else // defined _WIN32 class NCNN_EXPORT Mutex { public: @@ -186,7 +186,7 @@ public: private: pthread_key_t key; }; -#endif // (defined _WIN32 && !(defined __MINGW32__)) +#endif // defined _WIN32 #else // NCNN_THREADS class NCNN_EXPORT Mutex {