Skip to content

Commit

Permalink
Add option for runtime element distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
macedo22 authored and knelli2 committed Nov 28, 2023
1 parent 7b65d74 commit d9e16ad
Show file tree
Hide file tree
Showing 68 changed files with 393 additions and 61 deletions.
33 changes: 33 additions & 0 deletions src/Domain/ElementDistribution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
#include "NumericalAlgorithms/Spectral/LogicalCoordinates.hpp"
#include "NumericalAlgorithms/Spectral/Mesh.hpp"
#include "NumericalAlgorithms/Spectral/Quadrature.hpp"
#include "Options/Options.hpp"
#include "Options/ParseError.hpp"
#include "Options/ParseOptions.hpp"
#include "Utilities/Algorithm.hpp"
#include "Utilities/ConstantExpressions.hpp"
#include "Utilities/ErrorHandling/Assert.hpp"
Expand Down Expand Up @@ -75,6 +78,19 @@ double get_num_points_and_grid_spacing_cost(
}
} // namespace

std::ostream& operator<<(std::ostream& os, ElementWeight weight) {
switch (weight) {
case ElementWeight::Uniform:
return os << "Uniform";
case ElementWeight::NumGridPoints:
return os << "NumGridPoints";
case ElementWeight::NumGridPointsAndGridSpacing:
return os << "NumGridPointsAndGridSpacing";
default:
ERROR("Unknown ElementWeight type");
}
}

template <size_t Dim>
std::unordered_map<ElementId<Dim>, double> get_element_costs(
const std::vector<Block<Dim>>& blocks,
Expand Down Expand Up @@ -327,3 +343,20 @@ GENERATE_INSTANTIATIONS(INSTANTIATION, (1, 2, 3))
#undef GET_DIM
#undef INSTANTIATION
} // namespace domain

template <>
domain::ElementWeight
Options::create_from_yaml<domain::ElementWeight>::create<void>(
const Options::Option& options) {
const auto ordering = options.parse_as<std::string>();
if (ordering == "Uniform") {
return domain::ElementWeight::Uniform;
} else if (ordering == "NumGridPoints") {
return domain::ElementWeight::NumGridPoints;
} else if (ordering == "NumGridPointsAndGridSpacing") {
return domain::ElementWeight::NumGridPointsAndGridSpacing;
}
PARSE_ERROR(options.context(),
"ElementWeight must be 'Uniform', 'NumGridPoints', or, "
"'NumGridPointsAndGridSpacing'");
}
19 changes: 19 additions & 0 deletions src/Domain/ElementDistribution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <utility>
#include <vector>

#include "Options/Options.hpp"

/// \cond
template <size_t Dim>
class Block;
Expand Down Expand Up @@ -41,6 +43,8 @@ enum class ElementWeight {
NumGridPointsAndGridSpacing
};

std::ostream& operator<<(std::ostream& os, ElementWeight weight);

/// \brief Get the cost of each `Element` in a list of `Block`s where
/// `element_weight` specifies which weight distribution scheme to use
///
Expand Down Expand Up @@ -133,6 +137,8 @@ std::unordered_map<ElementId<Dim>, double> get_element_costs(
*/
template <size_t Dim>
struct BlockZCurveProcDistribution {
BlockZCurveProcDistribution() = default;

/// The `number_of_procs_with_elements` argument represents how many procs
/// will have elements. This is not necessarily equal to the total number of
/// procs because some global procs may be ignored by the sixth argument
Expand Down Expand Up @@ -166,3 +172,16 @@ struct BlockZCurveProcDistribution {
block_element_distribution_;
};
} // namespace domain

template <>
struct Options::create_from_yaml<domain::ElementWeight> {
template <typename Metavariables>
static domain::ElementWeight create(const Options::Option& options) {
return create<void>(options);
}
};

template <>
domain::ElementWeight
Options::create_from_yaml<domain::ElementWeight>::create<void>(
const Options::Option& options);
1 change: 1 addition & 0 deletions src/Domain/Tags/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ spectre_target_headers(
${LIBRARY}
INCLUDE_DIRECTORY ${CMAKE_SOURCE_DIR}/src
HEADERS
ElementDistribution.hpp
FaceNormal.hpp
Faces.hpp
SurfaceJacobian.hpp
Expand Down
41 changes: 41 additions & 0 deletions src/Domain/Tags/ElementDistribution.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Distributed under the MIT License.
// See LICENSE.txt for details.

#pragma once

#include <cstddef>
#include <memory>
#include <optional>

#include "DataStructures/DataBox/Tag.hpp"
#include "Domain/Domain.hpp"
#include "Domain/ElementDistribution.hpp"
#include "Options/Auto.hpp"
#include "Options/String.hpp"
#include "Utilities/TMPL.hpp"

namespace domain {
namespace OptionTags {
/// \ingroup OptionTagsGroup
/// \ingroup ComputationalDomainGroup
struct ElementDistribution {
struct RoundRobin {};
using type = Options::Auto<ElementWeight, RoundRobin>;
static constexpr Options::String help = {
"Weighting pattern to use for ZCurve element distribution. Specify "
"RoundRobin to just place each element on the next core."};
};
} // namespace OptionTags

namespace Tags {
struct ElementDistribution : db::SimpleTag {
using type = std::optional<ElementWeight>;
using option_tags = tmpl::list<OptionTags::ElementDistribution>;

static constexpr bool pass_metavariables = false;
static type create_from_options(const type& element_distribution) {
return element_distribution;
}
};
} // namespace Tags
} // namespace domain
73 changes: 53 additions & 20 deletions src/Elliptic/DiscontinuousGalerkin/DgElementArray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include "Domain/Structure/ElementId.hpp"
#include "Domain/Structure/InitialElementIds.hpp"
#include "Domain/Tags.hpp"
#include "Domain/Tags/ElementDistribution.hpp"
#include "Elliptic/DiscontinuousGalerkin/Tags.hpp"
#include "Parallel/Algorithms/AlgorithmArray.hpp"
#include "Parallel/GlobalCache.hpp"
#include "Parallel/Info.hpp"
Expand Down Expand Up @@ -70,6 +72,8 @@ struct DefaultElementsAllocator
Parallel::get_parallel_component<ParallelComponent>(local_cache);
const auto& initial_extents =
get<domain::Tags::InitialExtents<Dim>>(initialization_items);
const auto& quadrature =
Parallel::get<elliptic::dg::Tags::Quadrature>(local_cache);

const auto& domain = Parallel::get<domain::Tags::Domain<Dim>>(local_cache);
const auto& initial_refinement_levels =
Expand All @@ -83,39 +87,67 @@ struct DefaultElementsAllocator

const auto& blocks = domain.blocks();

const std::unordered_map<ElementId<Dim>, double> element_costs =
domain::get_element_costs(
blocks, initial_refinement_levels, initial_extents,
domain::ElementWeight::NumGridPoints, std::nullopt);
const domain::BlockZCurveProcDistribution<Dim> element_distribution{
element_costs, num_of_procs_to_use, blocks, initial_refinement_levels,
initial_extents, procs_to_ignore};
const std::optional<domain::ElementWeight>& element_weight =
get<domain::Tags::ElementDistribution>(local_cache);

domain::BlockZCurveProcDistribution<Dim> element_distribution{};
if (element_weight.has_value()) {
const std::unordered_map<ElementId<Dim>, double> element_costs =
domain::get_element_costs(blocks, initial_refinement_levels,
initial_extents, element_weight.value(),
quadrature);
element_distribution = domain::BlockZCurveProcDistribution<Dim>{
element_costs, num_of_procs_to_use,
blocks, initial_refinement_levels,
initial_extents, procs_to_ignore};
}

// Will be used to print domain diagnostic info
std::vector<size_t> elements_per_core(number_of_procs, 0_st);
std::vector<size_t> elements_per_node(number_of_nodes, 0_st);
std::vector<size_t> grid_points_per_core(number_of_procs, 0_st);
std::vector<size_t> grid_points_per_node(number_of_nodes, 0_st);

size_t which_proc = 0;
for (const auto& block : blocks) {
const size_t grid_points_per_element = alg::accumulate(
initial_extents[block.id()], 1_st, std::multiplies<size_t>());

const std::vector<ElementId<Dim>> element_ids = initial_element_ids(
block.id(), initial_refinement_levels[block.id()]);

for (const auto& element_id : element_ids) {
const size_t target_proc =
element_distribution.get_proc_for_element(element_id);
element_array(element_id)
.insert(global_cache, initialization_items, target_proc);

const size_t target_node =
Parallel::node_of<size_t>(target_proc, local_cache);
++elements_per_core[target_proc];
++elements_per_node[target_node];
grid_points_per_core[target_proc] += grid_points_per_element;
grid_points_per_node[target_node] += grid_points_per_element;
if (element_weight.has_value()) {
for (const auto& element_id : element_ids) {
const size_t target_proc =
element_distribution.get_proc_for_element(element_id);
element_array(element_id)
.insert(global_cache, initialization_items, target_proc);

const size_t target_node =
Parallel::node_of<size_t>(target_proc, local_cache);
++elements_per_core[target_proc];
++elements_per_node[target_node];
grid_points_per_core[target_proc] += grid_points_per_element;
grid_points_per_node[target_node] += grid_points_per_element;
}
} else {
for (size_t i = 0; i < element_ids.size(); ++i) {
while (procs_to_ignore.find(which_proc) != procs_to_ignore.end()) {
which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1;
}

element_array(ElementId<Dim>(element_ids[i]))
.insert(global_cache, initialization_items, which_proc);

const size_t target_node =
Parallel::node_of<size_t>(which_proc, local_cache);
++elements_per_core[which_proc];
++elements_per_node[target_node];
grid_points_per_core[which_proc] += grid_points_per_element;
grid_points_per_node[target_node] += grid_points_per_element;

which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1;
}
}
}
element_array.doneInserting();
Expand Down Expand Up @@ -153,7 +185,8 @@ struct DgElementArray {
using phase_dependent_action_list = PhaseDepActionList;
using array_index = ElementId<volume_dim>;

using const_global_cache_tags = tmpl::list<domain::Tags::Domain<volume_dim>>;
using const_global_cache_tags = tmpl::list<domain::Tags::Domain<volume_dim>,
domain::Tags::ElementDistribution>;

using array_allocation_tags =
typename ElementsAllocator::template array_allocation_tags<
Expand Down
40 changes: 18 additions & 22 deletions src/Evolution/DiscontinuousGalerkin/DgElementArray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "Domain/ElementDistribution.hpp"
#include "Domain/Structure/ElementId.hpp"
#include "Domain/Structure/InitialElementIds.hpp"
#include "Domain/Tags/ElementDistribution.hpp"
#include "Evolution/DiscontinuousGalerkin/Initialization/QuadratureTag.hpp"
#include "Parallel/Algorithms/AlgorithmArray.hpp"
#include "Parallel/GlobalCache.hpp"
Expand Down Expand Up @@ -71,7 +72,8 @@ struct DgElementArray {
using phase_dependent_action_list = PhaseDepActionList;
using array_index = ElementId<volume_dim>;

using const_global_cache_tags = tmpl::list<domain::Tags::Domain<volume_dim>>;
using const_global_cache_tags = tmpl::list<domain::Tags::Domain<volume_dim>,
domain::Tags::ElementDistribution>;

using simple_tags_from_options = Parallel::get_simple_tags_from_options<
Parallel::get_initialization_actions_list<phase_dependent_action_list>>;
Expand Down Expand Up @@ -110,33 +112,26 @@ void DgElementArray<Metavariables, PhaseDepActionList>::allocate_array(
get<domain::Tags::InitialExtents<volume_dim>>(initialization_items);
const auto& quadrature =
get<evolution::dg::Tags::Quadrature>(initialization_items);

bool use_z_order_distribution = true;
if constexpr (detail::has_use_z_order_distribution_v<Metavariables>) {
use_z_order_distribution = Metavariables::use_z_order_distribution;
}

bool local_time_stepping = false;
if constexpr (detail::has_local_time_stepping_v<Metavariables>) {
local_time_stepping = Metavariables::local_time_stepping;
}
const std::optional<domain::ElementWeight>& element_weight =
Parallel::get<domain::Tags::ElementDistribution>(local_cache);

const size_t number_of_procs = Parallel::number_of_procs<size_t>(local_cache);
const size_t number_of_nodes = Parallel::number_of_nodes<size_t>(local_cache);
const size_t num_of_procs_to_use = number_of_procs - procs_to_ignore.size();

const auto& blocks = domain.blocks();

const std::unordered_map<ElementId<volume_dim>, double> element_costs =
domain::get_element_costs(
blocks, initial_refinement_levels, initial_extents,
local_time_stepping
? domain::ElementWeight::NumGridPointsAndGridSpacing
: domain::ElementWeight::NumGridPoints,
quadrature);
const domain::BlockZCurveProcDistribution<volume_dim> element_distribution{
element_costs, num_of_procs_to_use, blocks, initial_refinement_levels,
initial_extents, procs_to_ignore};
// Only need this if the element weight has a value
domain::BlockZCurveProcDistribution<volume_dim> element_distribution{};
if (element_weight.has_value()) {
const std::unordered_map<ElementId<volume_dim>, double> element_costs =
domain::get_element_costs(blocks, initial_refinement_levels,
initial_extents, element_weight.value(),
quadrature);
element_distribution = domain::BlockZCurveProcDistribution<volume_dim>{
element_costs, num_of_procs_to_use, blocks, initial_refinement_levels,
initial_extents, procs_to_ignore};
}

// Will be used to print domain diagnostic info
std::vector<size_t> elements_per_core(number_of_procs, 0_st);
Expand All @@ -153,7 +148,8 @@ void DgElementArray<Metavariables, PhaseDepActionList>::allocate_array(
const std::vector<ElementId<volume_dim>> element_ids =
initial_element_ids(block.id(), initial_ref_levs);

if (use_z_order_distribution) {
// Value means ZCurve. nullopt means round robin
if (element_weight.has_value()) {
for (const auto& element_id : element_ids) {
const size_t target_proc =
element_distribution.get_proc_for_element(element_id);
Expand Down
Loading

0 comments on commit d9e16ad

Please sign in to comment.