diff --git a/libs/pika/affinity/src/affinity_data.cpp b/libs/pika/affinity/src/affinity_data.cpp index 148bb265aa..21b6ce9ae2 100644 --- a/libs/pika/affinity/src/affinity_data.cpp +++ b/libs/pika/affinity/src/affinity_data.cpp @@ -71,7 +71,7 @@ namespace pika::detail { init_cached_pu_nums(num_system_pus); - auto const& topo = threads::detail::create_topology(); + auto const& topo = threads::detail::get_topology(); if (affinity_description == "none") { diff --git a/libs/pika/affinity/src/parse_affinity_options.cpp b/libs/pika/affinity/src/parse_affinity_options.cpp index bf784e7f76..3072e73d94 100644 --- a/libs/pika/affinity/src/parse_affinity_options.cpp +++ b/libs/pika/affinity/src/parse_affinity_options.cpp @@ -86,7 +86,7 @@ namespace pika::detail { { if (!use_process_mask) { return true; } - threads::detail::mask_type proc_mask = t.get_cpubind_mask(); + threads::detail::mask_type proc_mask = t.get_cpubind_mask_main_thread(); threads::detail::mask_type pu_mask = t.init_thread_affinity_mask(num_core, num_pu); return threads::detail::bit_and(proc_mask, pu_mask); @@ -97,7 +97,7 @@ namespace pika::detail { { if (use_process_mask) { - threads::detail::mask_type proc_mask = t.get_cpubind_mask(); + threads::detail::mask_type proc_mask = t.get_cpubind_mask_main_thread(); std::size_t num_pus_proc_mask = threads::detail::count(proc_mask); if (num_threads > num_pus_proc_mask) @@ -481,9 +481,7 @@ namespace pika::detail { parse_mappings(spec, mappings, ec); if (ec) return; - // We need to instantiate a new topology object as the runtime has not - // been initialized yet - threads::detail::topology& t = threads::detail::create_topology(); + threads::detail::topology& t = threads::detail::get_topology(); decode_distribution(mappings, t, affinities, used_cores, max_cores, num_threads, num_pus, use_process_mask, ec); diff --git a/libs/pika/command_line_handling/src/command_line_handling.cpp b/libs/pika/command_line_handling/src/command_line_handling.cpp index 4a8b612fe8..ae51aceb7f 100644 --- a/libs/pika/command_line_handling/src/command_line_handling.cpp +++ b/libs/pika/command_line_handling/src/command_line_handling.cpp @@ -195,21 +195,21 @@ namespace pika::detail { { if (use_process_mask) { - threads::detail::topology& top = threads::detail::create_topology(); - return threads::detail::count(top.get_cpubind_mask()); + threads::detail::topology& top = threads::detail::get_topology(); + return threads::detail::count(top.get_cpubind_mask_main_thread()); } else { return threads::detail::hardware_concurrency(); } } std::size_t get_number_of_default_cores(bool use_process_mask) { - threads::detail::topology& top = threads::detail::create_topology(); + threads::detail::topology& top = threads::detail::get_topology(); std::size_t num_cores = top.get_number_of_cores(); if (use_process_mask) { - threads::detail::mask_type proc_mask = top.get_cpubind_mask(); + threads::detail::mask_type proc_mask = top.get_cpubind_mask_main_thread(); std::size_t num_cores_proc_mask = 0; for (std::size_t num_core = 0; num_core < num_cores; ++num_core) diff --git a/libs/pika/resource_partitioner/src/detail_partitioner.cpp b/libs/pika/resource_partitioner/src/detail_partitioner.cpp index 89fc994fa9..39f233e9b9 100644 --- a/libs/pika/resource_partitioner/src/detail_partitioner.cpp +++ b/libs/pika/resource_partitioner/src/detail_partitioner.cpp @@ -182,7 +182,7 @@ namespace pika::resource::detail { : rtcfg_() , first_core_(std::size_t(-1)) , mode_(mode_default) - , topo_(threads::detail::create_topology()) + , topo_(threads::detail::get_topology()) , default_scheduler_mode_(threads::scheduler_mode::default_mode) { // allow only one partitioner instance @@ -358,7 +358,8 @@ namespace pika::resource::detail { std::string process_mask_message = affinity_data_.using_process_mask() ? fmt::format("pika is using a process mask: {}.", - pika::threads::detail::to_string(get_topology().get_cpubind_mask())) : + pika::threads::detail::to_string( + get_topology().get_cpubind_mask_main_thread())) : "pika is not using a process mask."; auto omp_proc_bind = std::getenv("OMP_PROC_BIND"); std::string omp_proc_bind_message = omp_proc_bind ? diff --git a/libs/pika/runtime/src/runtime.cpp b/libs/pika/runtime/src/runtime.cpp index 350a90ab98..c8718af5ee 100644 --- a/libs/pika/runtime/src/runtime.cpp +++ b/libs/pika/runtime/src/runtime.cpp @@ -998,7 +998,7 @@ namespace pika { namespace detail { void handle_print_bind(std::size_t num_threads) { - threads::detail::topology& top = threads::detail::create_topology(); + threads::detail::topology& top = threads::detail::get_topology(); auto const& rp = pika::resource::get_partitioner(); auto const& tm = get_runtime().get_thread_manager(); diff --git a/libs/pika/schedulers/include/pika/schedulers/local_priority_queue_scheduler.hpp b/libs/pika/schedulers/include/pika/schedulers/local_priority_queue_scheduler.hpp index 6d1d074448..096792b43e 100644 --- a/libs/pika/schedulers/include/pika/schedulers/local_priority_queue_scheduler.hpp +++ b/libs/pika/schedulers/include/pika/schedulers/local_priority_queue_scheduler.hpp @@ -1084,7 +1084,7 @@ namespace pika::threads::detail { queues_[num_thread].data_->on_start_thread(num_thread); std::size_t num_threads = num_queues_; - auto const& topo = ::pika::threads::detail::create_topology(); + auto const& topo = ::pika::threads::detail::get_topology(); // get NUMA domain masks of all queues... std::vector<::pika::threads::detail::mask_type> numa_masks(num_threads); diff --git a/libs/pika/schedulers/include/pika/schedulers/local_queue_scheduler.hpp b/libs/pika/schedulers/include/pika/schedulers/local_queue_scheduler.hpp index aefb3cabb0..e468e01c2e 100644 --- a/libs/pika/schedulers/include/pika/schedulers/local_queue_scheduler.hpp +++ b/libs/pika/schedulers/include/pika/schedulers/local_queue_scheduler.hpp @@ -98,9 +98,9 @@ namespace pika::threads::detail { , steals_in_numa_domain_() , steals_outside_numa_domain_() , numa_domain_masks_(init.num_queues_, - ::pika::threads::detail::create_topology().get_machine_affinity_mask()) + ::pika::threads::detail::get_topology().get_machine_affinity_mask()) , outside_numa_domain_masks_(init.num_queues_, - ::pika::threads::detail::create_topology().get_machine_affinity_mask()) + ::pika::threads::detail::get_topology().get_machine_affinity_mask()) { ::pika::threads::detail::resize( steals_in_numa_domain_, threads::detail::hardware_concurrency()); @@ -781,7 +781,7 @@ namespace pika::threads::detail { queues_[num_thread]->on_start_thread(num_thread); - auto const& topo = ::pika::threads::detail::create_topology(); + auto const& topo = ::pika::threads::detail::get_topology(); // pre-calculate certain constants for the given thread number std::size_t num_pu = affinity_data_.get_pu_num(num_thread); diff --git a/libs/pika/schedulers/include/pika/schedulers/shared_priority_queue_scheduler.hpp b/libs/pika/schedulers/include/pika/schedulers/shared_priority_queue_scheduler.hpp index 0a74c17310..ef41361c65 100644 --- a/libs/pika/schedulers/include/pika/schedulers/shared_priority_queue_scheduler.hpp +++ b/libs/pika/schedulers/include/pika/schedulers/shared_priority_queue_scheduler.hpp @@ -903,7 +903,7 @@ namespace pika::threads::detail { using namespace pika::debug::detail; PIKA_DETAIL_DP(spq_deb<5>, debug(str<>("start_thread"), "local_thread", local_thread)); - auto const& topo = ::pika::threads::detail::create_topology(); + auto const& topo = ::pika::threads::detail::get_topology(); // the main initialization can be done by any one thread std::unique_lock lock(init_mutex); if (!initialized_) diff --git a/libs/pika/thread_pools/include/pika/thread_pools/scheduled_thread_pool_impl.hpp b/libs/pika/thread_pools/include/pika/thread_pools/scheduled_thread_pool_impl.hpp index 4be6a57f8e..4aca1229cf 100644 --- a/libs/pika/thread_pools/include/pika/thread_pools/scheduled_thread_pool_impl.hpp +++ b/libs/pika/thread_pools/include/pika/thread_pools/scheduled_thread_pool_impl.hpp @@ -283,7 +283,7 @@ namespace pika::threads::detail { std::make_shared(pool_threads + 1); try { - topology const& topo = create_topology(); + topology const& topo = get_topology(); for (/**/; thread_num != pool_threads; ++thread_num) { @@ -392,7 +392,7 @@ namespace pika::threads::detail { pika::threads::detail::scheduled_thread_pool::thread_func(std::size_t thread_num, std::size_t global_thread_num, std::shared_ptr startup) { - topology const& topo = create_topology(); + topology const& topo = get_topology(); // Set the affinity for the current thread. threads::detail::mask_cref_type mask = affinity_data_.get_pu_mask(topo, global_thread_num); diff --git a/libs/pika/threading_base/src/thread_pool_base.cpp b/libs/pika/threading_base/src/thread_pool_base.cpp index 0a5c6117ef..e6376508f5 100644 --- a/libs/pika/threading_base/src/thread_pool_base.cpp +++ b/libs/pika/threading_base/src/thread_pool_base.cpp @@ -38,7 +38,7 @@ namespace pika::threads::detail { /////////////////////////////////////////////////////////////////////////// mask_type thread_pool_base::get_used_processing_units() const { - auto const& topo = create_topology(); + auto const& topo = get_topology(); auto const sched = get_scheduler(); mask_type used_processing_units = mask_type(); @@ -58,7 +58,7 @@ namespace pika::threads::detail { hwloc_bitmap_ptr thread_pool_base::get_numa_domain_bitmap() const { - auto const& topo = create_topology(); + auto const& topo = get_topology(); mask_type used_processing_units = get_used_processing_units(); return topo.cpuset_to_nodeset(used_processing_units); } diff --git a/libs/pika/topology/include/pika/topology/topology.hpp b/libs/pika/topology/include/pika/topology/topology.hpp index 48526e4152..bd75c4b23e 100644 --- a/libs/pika/topology/include/pika/topology/topology.hpp +++ b/libs/pika/topology/include/pika/topology/topology.hpp @@ -244,6 +244,7 @@ namespace pika::threads::detail { std::size_t get_pu_number( std::size_t num_core, std::size_t num_pu, error_code& ec = throws) const; + mask_type get_cpubind_mask_main_thread(error_code& ec = throws) const; mask_type get_cpubind_mask(error_code& ec = throws) const; mask_type get_cpubind_mask(std::thread& handle, error_code& ec = throws) const; @@ -361,12 +362,13 @@ namespace pika::threads::detail { std::vector numa_node_affinity_masks_; std::vector core_affinity_masks_; std::vector thread_affinity_masks_; + mask_type main_thread_affinity_mask_; }; #include /////////////////////////////////////////////////////////////////////////// - PIKA_EXPORT topology& create_topology(); + PIKA_EXPORT topology& get_topology(); [[nodiscard]] PIKA_EXPORT unsigned int hardware_concurrency() noexcept; diff --git a/libs/pika/topology/src/topology.cpp b/libs/pika/topology/src/topology.cpp index 208d5fb8f0..32fcab126a 100644 --- a/libs/pika/topology/src/topology.cpp +++ b/libs/pika/topology/src/topology.cpp @@ -52,12 +52,6 @@ #endif namespace pika::threads::detail { - std::size_t hwloc_hardware_concurrency() - { - threads::detail::topology& top = threads::detail::create_topology(); - return top.get_number_of_pus(); - } - void write_to_log(char const* valuename, std::size_t value) { LTM_(debug).format("topology: {}: {}", valuename, value); //-V128 @@ -183,6 +177,23 @@ namespace pika::threads::detail { } /////////////////////////////////////////////////////////////////////////// + // We use a function-local static for the topology object so that we don't depend on + // initialization order between TUs happening in a particular order and we guarantee that the + // object has been created before access. However, we also want to initialize the topology + // object early so that we can read the CPU mask of the main thread in case OpenMP wants to + // reset it, so we also have a global object call get_topology so that we don't depend on others + // calling get_topology early for us. + topology& get_topology() + { + static topology topo; + return topo; + } + + static struct init_topology_t + { + init_topology_t() { get_topology(); } + } init_topology{}; + #if !defined(PIKA_HAVE_MAX_CPU_COUNT) mask_type topology::empty_mask = mask_type(hardware_concurrency()); #else @@ -193,6 +204,7 @@ namespace pika::threads::detail { : topo(nullptr) , use_pus_as_cores_(false) , machine_affinity_mask_(0) + , main_thread_affinity_mask_(0) { // {{{ int err = hwloc_topology_init(&topo); if (err != 0) @@ -287,6 +299,10 @@ namespace pika::threads::detail { { thread_affinity_masks_.push_back(init_thread_affinity_mask(i)); } + + // We assume here that the topology object is created in a global constructor on the main + // thread (get_cpubind_mask returns the mask of the current thread). + main_thread_affinity_mask_ = get_cpubind_mask(); } // }}} void topology::write_to_log() const @@ -1119,6 +1135,11 @@ namespace pika::threads::detail { std::size_t topology::get_number_of_pus() const { return num_of_pus_; } /////////////////////////////////////////////////////////////////////////// + mask_type topology::get_cpubind_mask_main_thread(error_code&) const + { + return main_thread_affinity_mask_; + } + mask_type topology::get_cpubind_mask(error_code& ec) const { hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); @@ -1421,32 +1442,13 @@ namespace pika::threads::detail { //print_vector(os, pu_numbers_); } - /////////////////////////////////////////////////////////////////////////// - topology& create_topology() - { - static topology topo; - return topo; - } - - /////////////////////////////////////////////////////////////////////////// - struct hw_concurrency + unsigned int hardware_concurrency() noexcept { - hw_concurrency() noexcept #if defined(__ANDROID__) && defined(ANDROID) - : num_of_cores_(::android_getCpuCount()) + static auto concurrency = ::android_getCpuCount(); #else - : num_of_cores_(hwloc_hardware_concurrency()) + static auto concurrency = get_topology().get_number_of_pus(); #endif - { - if (num_of_cores_ == 0) num_of_cores_ = 1; - } - - std::size_t num_of_cores_; - }; - - unsigned int hardware_concurrency() noexcept - { - static detail::hw_concurrency hwc; - return static_cast(hwc.num_of_cores_); + return static_cast(concurrency); } } // namespace pika::threads::detail