Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more Kokkos and nvcc support #6213

Merged
merged 11 commits into from
Aug 14, 2024
14 changes: 12 additions & 2 deletions cmake/AddSpectreExecutable.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,21 @@ function(add_spectre_executable TARGET_NAME)
PUBLIC
${SPECTRE_ALLOCATOR_LIBRARY}
SpectreAllocator
)
)

set(SPECTRE_KOKKOS_LAUNCHER "")
if(SPECTRE_KOKKOS)
# We need to make sure we don't drop the Kokkos link wrapper
get_target_property(
SPECTRE_KOKKOS_LAUNCHER
${TARGET_NAME}
RULE_LAUNCH_LINK)
endif()
set_target_properties(
${TARGET_NAME}
PROPERTIES
RULE_LAUNCH_LINK "${CMAKE_BINARY_DIR}/tmp/WrapExecutableLinker.sh"
RULE_LAUNCH_LINK
"${CMAKE_BINARY_DIR}/tmp/WrapExecutableLinker.sh ${SPECTRE_KOKKOS_LAUNCHER}"
LINK_DEPENDS "${CMAKE_BINARY_DIR}/tmp/WrapExecutableLinker.sh"
# Expose readable symbol names in backtrace (adds flags like -rdynamic)
ENABLE_EXPORTS ON
Expand Down
4 changes: 3 additions & 1 deletion cmake/SetupKokkos.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ if(SPECTRE_KOKKOS)

find_package(Kokkos REQUIRED)

if (TARGET Kokkos::kokkos AND Kokkos_ENABLE_CUDA)
if (TARGET Kokkos::kokkos
AND Kokkos_ENABLE_CUDA
AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set_property(TARGET Kokkos::kokkos
APPEND PROPERTY
INTERFACE_COMPILE_OPTIONS
Expand Down
1 change: 1 addition & 0 deletions cmake/SetupPch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ target_link_libraries(
Charmxx::pup
HDF5::HDF5
SpectreFlags
SpectreKokkos
)

# Targets can reuse the PCH generated for this library. They must also link
Expand Down
15 changes: 12 additions & 3 deletions cmake/SpectreAddLibraries.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,23 @@ function(ADD_SPECTRE_LIBRARY LIBRARY_NAME)
PUBLIC
${SPECTRE_ALLOCATOR_LIBRARY}
SpectreAllocator
)
)

set(SPECTRE_KOKKOS_LAUNCHER "")
if(SPECTRE_KOKKOS)
# We need to make sure we don't drop the Kokkos link wrapper
get_target_property(
SPECTRE_KOKKOS_LAUNCHER
${LIBRARY_NAME}
RULE_LAUNCH_LINK)
endif()
set_target_properties(
${LIBRARY_NAME}
PROPERTIES
RULE_LAUNCH_LINK "${CMAKE_BINARY_DIR}/tmp/WrapLibraryLinker.sh"
RULE_LAUNCH_LINK
"${CMAKE_BINARY_DIR}/tmp/WrapLibraryLinker.sh ${SPECTRE_KOKKOS_LAUNCHER}"
LINK_DEPENDS "${CMAKE_BINARY_DIR}/tmp/WrapLibraryLinker.sh"
)
)
endif (NOT ${LIBRARY_TYPE} STREQUAL INTERFACE_LIBRARY)
if (NOT "${LIBRARY_NAME}" MATCHES "^SpectrePch"
AND NOT ${LIBRARY_IS_IMPORTED}
Expand Down
22 changes: 11 additions & 11 deletions src/Domain/BlockLogicalCoordinates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
#include "Utilities/ErrorHandling/Error.hpp"
#include "Utilities/GenerateInstantiations.hpp"

template <size_t Dim, typename Frame>
template <size_t Dim, typename Fr>
std::optional<tnsr::I<double, Dim, ::Frame::BlockLogical>>
block_logical_coordinates_single_point(
const tnsr::I<double, Dim, Frame>& input_point, const Block<Dim>& block,
const tnsr::I<double, Dim, Fr>& input_point, const Block<Dim>& block,
const double time, const domain::FunctionsOfTimeMap& functions_of_time) {
std::optional<tnsr::I<double, Dim, ::Frame::BlockLogical>> logical_point{};
if (block.is_time_dependent()) {
if constexpr (std::is_same_v<Frame, ::Frame::Inertial>) {
if constexpr (std::is_same_v<Fr, ::Frame::Inertial>) {
// Point is in the inertial frame, so we need to map to the grid
// frame and then the logical frame.
const auto moving_inv = block.moving_mesh_grid_to_inertial_map().inverse(
Expand All @@ -35,7 +35,7 @@ block_logical_coordinates_single_point(
// logical to grid map is time-independent.
logical_point =
block.moving_mesh_logical_to_grid_map().inverse(moving_inv.value());
} else if constexpr (std::is_same_v<Frame, ::Frame::Distorted>) {
} else if constexpr (std::is_same_v<Fr, ::Frame::Distorted>) {
// Point is in the distorted frame, so we need to map to the grid
// frame and then the logical frame.
if (not block.has_distorted_frame()) {
Expand Down Expand Up @@ -78,24 +78,24 @@ block_logical_coordinates_single_point(
// frames in the block, so make sure Frame is
// ::Frame::Grid. (The Inertial and Distorted cases were
// handled above.)
static_assert(std::is_same_v<Frame, ::Frame::Grid>,
static_assert(std::is_same_v<Fr, ::Frame::Grid>,
"Cannot convert from given frame to Grid frame");

// Point is in the grid frame, just map to logical frame.
logical_point =
block.moving_mesh_logical_to_grid_map().inverse(input_point);
}
} else { // not block.is_time_dependent()
if constexpr (std::is_same_v<Frame, ::Frame::Inertial>) {
if constexpr (std::is_same_v<Fr, ::Frame::Inertial>) {
logical_point = block.stationary_map().inverse(input_point);
} else {
// If the map is time-independent, then the grid, distorted, and
// inertial frames are the same. So if we are in the grid
// or distorted frames, convert to the inertial frame
// (this conversion is just a type conversion).
// Otherwise throw a static_assert.
static_assert(std::is_same_v<Frame, ::Frame::Grid> or
std::is_same_v<Frame, ::Frame::Distorted>,
static_assert(std::is_same_v<Fr, ::Frame::Grid> or
std::is_same_v<Fr, ::Frame::Distorted>,
"Cannot convert from given frame to Inertial frame");
tnsr::I<double, Dim, ::Frame::Inertial> x_inertial(0.0);
for (size_t d = 0; d < Dim; ++d) {
Expand Down Expand Up @@ -133,14 +133,14 @@ block_logical_coordinates_single_point(
return logical_point;
}

template <size_t Dim, typename Frame>
template <size_t Dim, typename Fr>
std::vector<BlockLogicalCoords<Dim>> block_logical_coordinates(
const Domain<Dim>& domain, const tnsr::I<DataVector, Dim, Frame>& x,
const Domain<Dim>& domain, const tnsr::I<DataVector, Dim, Fr>& x,
const double time, const domain::FunctionsOfTimeMap& functions_of_time) {
const size_t num_pts = get<0>(x).size();
std::vector<BlockLogicalCoords<Dim>> block_coord_holders(num_pts);
for (size_t s = 0; s < num_pts; ++s) {
tnsr::I<double, Dim, Frame> x_frame(0.0);
tnsr::I<double, Dim, Fr> x_frame(0.0);
for (size_t d = 0; d < Dim; ++d) {
x_frame.get(d) = x.get(d)[s];
}
Expand Down
8 changes: 4 additions & 4 deletions src/Domain/BlockLogicalCoordinates.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,17 @@ using BlockLogicalCoords = std::optional<
/// typical use cases. This means that `block_logical_coordinates`
/// does not assume that grid and distorted frames are equal in
/// `Block`s that lack a distorted frame.
template <size_t Dim, typename Frame>
template <size_t Dim, typename Fr>
auto block_logical_coordinates(
const Domain<Dim>& domain, const tnsr::I<DataVector, Dim, Frame>& x,
const Domain<Dim>& domain, const tnsr::I<DataVector, Dim, Fr>& x,
double time = std::numeric_limits<double>::signaling_NaN(),
const domain::FunctionsOfTimeMap& functions_of_time = {})
-> std::vector<BlockLogicalCoords<Dim>>;

template <size_t Dim, typename Frame>
template <size_t Dim, typename Fr>
std::optional<tnsr::I<double, Dim, ::Frame::BlockLogical>>
block_logical_coordinates_single_point(
const tnsr::I<double, Dim, Frame>& input_point, const Block<Dim>& block,
const tnsr::I<double, Dim, Fr>& input_point, const Block<Dim>& block,
double time = std::numeric_limits<double>::signaling_NaN(),
const domain::FunctionsOfTimeMap& functions_of_time = {});
/// @}
5 changes: 5 additions & 0 deletions src/Domain/FunctionsOfTime/PiecewisePolynomial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ void PiecewisePolynomial<MaxDeriv>::pup(PUP::er& p) {
namespace {
template <size_t MaxDerivPlusOne>
struct LegacyStoredInfo {
LegacyStoredInfo() = default;
LegacyStoredInfo(double in_time,
std::array<DataVector, 1> in_stored_quantities)
: time(in_time), stored_quantities(in_stored_quantities) {}

double time{std::numeric_limits<double>::signaling_NaN()};
std::array<DataVector, MaxDerivPlusOne> stored_quantities;

Expand Down
5 changes: 5 additions & 0 deletions src/Domain/FunctionsOfTime/QuaternionFunctionOfTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@

namespace {
struct LegacyStoredInfo {
LegacyStoredInfo() = default;
LegacyStoredInfo(double in_time,
std::array<DataVector, 1> in_stored_quantities)

Check failure on line 121 in src/Domain/FunctionsOfTime/QuaternionFunctionOfTime.cpp

View workflow job for this annotation

GitHub Actions / Clang-tidy (Debug)

pass by value and use std::move

Check failure on line 121 in src/Domain/FunctionsOfTime/QuaternionFunctionOfTime.cpp

View workflow job for this annotation

GitHub Actions / Clang-tidy (Release)

pass by value and use std::move
: time(in_time), stored_quantities(in_stored_quantities) {}

Check failure on line 122 in src/Domain/FunctionsOfTime/QuaternionFunctionOfTime.cpp

View workflow job for this annotation

GitHub Actions / Clang-tidy (Debug)

parameter 'in_stored_quantities' is passed by value and only copied once; consider moving it to avoid unnecessary copies

Check failure on line 122 in src/Domain/FunctionsOfTime/QuaternionFunctionOfTime.cpp

View workflow job for this annotation

GitHub Actions / Clang-tidy (Release)

parameter 'in_stored_quantities' is passed by value and only copied once; consider moving it to avoid unnecessary copies

double time{std::numeric_limits<double>::signaling_NaN()};
std::array<DataVector, 1> stored_quantities;

Expand Down
2 changes: 1 addition & 1 deletion src/Domain/FunctionsOfTime/ThreadsafeList.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class ThreadsafeList {
class iterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = decltype(ThreadsafeList{}(double{}));
using value_type = IntervalInfo;
using reference = value_type;
using pointer = std::optional<value_type>;
using difference_type = std::ptrdiff_t;
Expand Down
12 changes: 9 additions & 3 deletions src/Domain/FunctionsOfTime/ThreadsafeList.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@ namespace domain::FunctionsOfTime::FunctionOfTimeHelpers {
namespace ThreadsafeList_detail {
template <typename T>
struct Interval {
double expiration;
T data;
std::unique_ptr<Interval> previous;
Interval() = default;
Interval(double in_expiration, T in_data,
std::unique_ptr<Interval> in_previous)
: expiration(in_expiration),
data(std::move(in_data)),
previous(std::move(in_previous)) {}
double expiration{};
T data{};
std::unique_ptr<Interval> previous{};

void pup(PUP::er& p);
};
Expand Down
4 changes: 4 additions & 0 deletions src/IO/H5/Cce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ Cce::~Cce() {
// internals of CHECK_H5), so older compilers that we support may not have
// fixed this bug.
for (const auto& name_and_dataset : bondi_datasets_) {
#ifdef __CUDACC__
// nvcc warns that 'name' is unused
[[maybe_unused]]
#endif
const auto& name = name_and_dataset.first;
const auto& dataset = name_and_dataset.second;
CHECK_H5(H5Dclose(dataset.id), "Failed to close dataset " << name);
Expand Down
1 change: 1 addition & 0 deletions src/IO/Importers/Actions/ReadVolumeData.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "Domain/Structure/ElementId.hpp"
#include "IO/H5/AccessType.hpp"
#include "IO/H5/File.hpp"
#include "IO/H5/TensorData.hpp"
#include "IO/H5/VolumeData.hpp"
#include "IO/Importers/ObservationSelector.hpp"
#include "IO/Importers/Tags.hpp"
Expand Down
8 changes: 4 additions & 4 deletions src/NumericalAlgorithms/FiniteDifference/Reconstruct.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@

namespace fd::reconstruction {
namespace detail {
template <size_t Index, size_t DimToReplace, size_t... Is,
template <size_t IndexToSet, size_t DimToReplace, size_t... Is,
size_t Dim = sizeof...(Is)>
auto generate_index_for_u_to_reconstruct_impl(
const std::array<size_t, sizeof...(Is)>& indices,
std::index_sequence<Is...>) -> ::Index<Dim> {
return ::Index<Dim>{(DimToReplace != Is ? indices[Is] : Index)...};
return ::Index<Dim>{(DimToReplace != Is ? indices[Is] : IndexToSet)...};
}

template <size_t Index, size_t DimToReplace, size_t NumberOfNeighborCells,
template <size_t IndexToSet, size_t DimToReplace, size_t NumberOfNeighborCells,
size_t... Is, size_t Dim = sizeof...(Is)>
auto generate_upper_volume_index_for_u_to_reconstruct_impl(
const std::array<size_t, sizeof...(Is)>& indices,
Expand All @@ -36,7 +36,7 @@ auto generate_upper_volume_index_for_u_to_reconstruct_impl(
return ::Index<Dim>{
(DimToReplace != Is
? indices[Is]
: (volume_extents[Is] - (NumberOfNeighborCells - Index)))...};
: (volume_extents[Is] - (NumberOfNeighborCells - IndexToSet)))...};
}

template <Side UpperLower, size_t DimToReplace, size_t Dim,
Expand Down
23 changes: 11 additions & 12 deletions src/NumericalAlgorithms/SphericalHarmonics/TagsTypeAliases.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,20 @@ class DataVector;
namespace ylm::Tags {
/// Defines type aliases used in Strahlkorper-related Tags.
namespace aliases {
template <typename Frame>
template <typename Fr>
using Jacobian =
Tensor<DataVector, tmpl::integral_list<std::int32_t, 2, 1>,
index_list<SpatialIndex<3, UpLo::Up, Frame>,
SpatialIndex<2, UpLo::Lo, ::Frame::Spherical<Frame>>>>;
template <typename Frame>
index_list<SpatialIndex<3, UpLo::Up, Fr>,
SpatialIndex<2, UpLo::Lo, ::Frame::Spherical<Fr>>>>;
template <typename Fr>
using InvJacobian =
Tensor<DataVector, tmpl::integral_list<std::int32_t, 2, 1>,
index_list<SpatialIndex<2, UpLo::Up, ::Frame::Spherical<Frame>>,
SpatialIndex<3, UpLo::Lo, Frame>>>;
template <typename Frame>
using InvHessian =
Tensor<DataVector, tmpl::integral_list<std::int32_t, 3, 2, 1>,
index_list<SpatialIndex<2, UpLo::Up, ::Frame::Spherical<Frame>>,
SpatialIndex<3, UpLo::Lo, Frame>,
SpatialIndex<3, UpLo::Lo, Frame>>>;
index_list<SpatialIndex<2, UpLo::Up, ::Frame::Spherical<Fr>>,
SpatialIndex<3, UpLo::Lo, Fr>>>;
template <typename Fr>
using InvHessian = Tensor<
DataVector, tmpl::integral_list<std::int32_t, 3, 2, 1>,
index_list<SpatialIndex<2, UpLo::Up, ::Frame::Spherical<Fr>>,
SpatialIndex<3, UpLo::Lo, Fr>, SpatialIndex<3, UpLo::Lo, Fr>>>;
} // namespace aliases
} // namespace ylm::Tags
6 changes: 6 additions & 0 deletions tools/BlazeExceptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

#include <csignal>

#ifdef __CUDA_ARCH__
// When building for Nvidia GPUs we need to disable the use of vector
// intrinsics.
#define BLAZE_USE_VECTORIZATION 0
#endif

#ifndef SPECTRE_BLAZE_EXCEPTIONS_HPP
#define SPECTRE_BLAZE_EXCEPTIONS_HPP

Expand Down
Loading