From e6076aecc5266cc060a6162ba1d0e2ac76c4183b Mon Sep 17 00:00:00 2001 From: "romin.tomasetti" Date: Thu, 28 Mar 2024 14:45:19 +0000 Subject: [PATCH] graph: allow access to native graph object The native graph objects should be accessible for advanced users. --- cmake/kokkos_arch.cmake | 2 + core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp | 3 + core/src/HIP/Kokkos_HIP_Graph_Impl.hpp | 3 + core/src/Kokkos_Graph.hpp | 41 ++++- core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp | 3 + core/unit_test/CMakeLists.txt | 23 +++ core/unit_test/TestGraph.hpp | 10 ++ .../unit_test/cuda/TestCuda_InterOp_Graph.cpp | 144 ++++++++++++++++++ core/unit_test/hip/TestHIP_InterOp_Graph.cpp | 119 +++++++++++++++ .../unit_test/sycl/TestSYCL_InterOp_Graph.cpp | 104 +++++++++++++ 10 files changed, 451 insertions(+), 1 deletion(-) create mode 100644 core/unit_test/cuda/TestCuda_InterOp_Graph.cpp create mode 100644 core/unit_test/hip/TestHIP_InterOp_Graph.cpp create mode 100644 core/unit_test/sycl/TestSYCL_InterOp_Graph.cpp diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index 799b2e300fa..0c204b2fb75 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -762,6 +762,8 @@ if(KOKKOS_ENABLE_SYCL) compiler_specific_flags(DEFAULT -fsycl-device-code-split=off -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED) endif() endif() + + check_cxx_symbol_exists(SYCL_EXT_ONEAPI_GRAPH "sycl/sycl.hpp" KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_GRAPH) endif() set(CUDA_ARCH_ALREADY_SPECIFIED "") diff --git a/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp b/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp index c73e5f25053..8e800e756d2 100644 --- a/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp @@ -206,6 +206,9 @@ struct GraphImpl { m_execution_space, _graph_node_kernel_ctor_tag{}, aggregate_kernel_impl_t{}); } + + cudaGraph_t cuda_graph() { return m_graph; } + cudaGraphExec_t cuda_graph_exec() { return m_graph_exec; } }; } // end namespace Impl diff --git a/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp b/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp index 8c7484a1c69..4f97214ca68 100644 --- a/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp +++ b/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp @@ -79,6 +79,9 @@ class GraphImpl { KOKKOS_ENSURES(m_graph_exec); } + hipGraph_t hip_graph() { return m_graph; } + hipGraphExec_t hip_graph_exec() { return m_graph_exec; } + private: Kokkos::HIP m_execution_space; hipGraph_t m_graph = nullptr; diff --git a/core/src/Kokkos_Graph.hpp b/core/src/Kokkos_Graph.hpp index 43758462c50..64c52b81005 100644 --- a/core/src/Kokkos_Graph.hpp +++ b/core/src/Kokkos_Graph.hpp @@ -97,6 +97,10 @@ struct [[nodiscard]] Graph { } void submit() const { submit(get_execution_space()); } + + decltype(auto) native_graph(); + + decltype(auto) native_graph_exec(); }; // end Graph }}}1 @@ -168,6 +172,42 @@ create_graph(Closure&& arg_closure) { // end create_graph }}}1 //============================================================================== +template +decltype(auto) Graph::native_graph() { + KOKKOS_EXPECTS(bool(m_impl_ptr)); +#if defined(KOKKOS_ENABLE_CUDA) + if constexpr (std::is_same_v) { + return m_impl_ptr->cuda_graph(); + } +#elif defined(KOKKOS_ENABLE_HIP) && defined(KOKKOS_IMPL_HIP_NATIVE_GRAPH) + if constexpr (std::is_same_v) { + return m_impl_ptr->hip_graph(); + } +#elif defined(KOKKOS_ENABLE_SYCL) && defined(SYCL_EXT_ONEAPI_GRAPH) + if constexpr (std::is_same_v) { + return m_impl_ptr->sycl_graph(); + } +#endif +} + +template +decltype(auto) Graph::native_graph_exec() { + KOKKOS_EXPECTS(bool(m_impl_ptr)); +#if defined(KOKKOS_ENABLE_CUDA) + if constexpr (std::is_same_v) { + return m_impl_ptr->cuda_graph_exec(); + } +#elif defined(KOKKOS_ENABLE_HIP) && defined(KOKKOS_IMPL_HIP_NATIVE_GRAPH) + if constexpr (std::is_same_v) { + return m_impl_ptr->hip_graph_exec(); + } +#elif defined(KOKKOS_ENABLE_SYCL) && defined(SYCL_EXT_ONEAPI_GRAPH) + if constexpr (std::is_same_v) { + return m_impl_ptr->sycl_graph_exec(); + } +#endif +} + } // end namespace Experimental } // namespace Kokkos @@ -179,7 +219,6 @@ create_graph(Closure&& arg_closure) { #include #include #if defined(KOKKOS_ENABLE_HIP) -// The implementation of hipGraph in ROCm 5.2 is bugged, so we cannot use it. #if defined(KOKKOS_IMPL_HIP_NATIVE_GRAPH) #include #endif diff --git a/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp b/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp index 2e7b771ca12..dc63052dd7a 100644 --- a/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp +++ b/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp @@ -76,6 +76,9 @@ class GraphImpl { m_graph_exec = m_graph.finalize(); } + auto& sycl_graph() { return m_graph; } + auto& sycl_graph_exec() { return m_graph_exec; } + private: Kokkos::SYCL m_execution_space; sycl::ext::oneapi::experimental::command_graph< diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index 66dacef7639..17c8c649f89 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -730,6 +730,13 @@ if(Kokkos_ENABLE_CUDA) kokkos_add_executable_and_test( CoreUnitTest_CudaInterOpStreamsMultiGPU SOURCES UnitTestMainInit.cpp cuda/TestCuda_InterOp_StreamsMultiGPU.cpp ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + CoreUnitTest_CudaInterOpGraph + SOURCES + UnitTestMainInit.cpp + cuda/TestCuda_InterOp_Graph.cpp + ) endif() if(Kokkos_ENABLE_HIP) @@ -749,6 +756,13 @@ if(Kokkos_ENABLE_HIP) kokkos_add_executable_and_test( CoreUnitTest_HIPInterOpStreams SOURCES UnitTestMain.cpp hip/TestHIP_InterOp_Streams.cpp ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + CoreUnitTest_HIPInterOpGraph + SOURCES + UnitTestMainInit.cpp + hip/TestHIP_InterOp_Graph.cpp + ) endif() if(Kokkos_ENABLE_SYCL) @@ -787,6 +801,15 @@ if(Kokkos_ENABLE_SYCL) kokkos_add_executable_and_test( CoreUnitTest_SYCLInterOpStreamsMultiGPU SOURCES UnitTestMainInit.cpp sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp ) + + if(KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_GRAPH) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + CoreUnitTest_SYCLInterOpGraph + SOURCES + UnitTestMainInit.cpp + sycl/TestSYCL_InterOp_Graph.cpp + ) + endif() endif() set(DEFAULT_DEVICE_SOURCES diff --git a/core/unit_test/TestGraph.hpp b/core/unit_test/TestGraph.hpp index fc2a57a3b70..0ab5ff37717 100644 --- a/core/unit_test/TestGraph.hpp +++ b/core/unit_test/TestGraph.hpp @@ -510,6 +510,16 @@ TEST_F(TEST_CATEGORY_FIXTURE(graph), force_global_launch) { #endif } +// Ensure that an empty graph on the default host execution space +// can be submitted. +TEST_F(TEST_CATEGORY_FIXTURE(graph), empty_graph_default_host_exec) { + auto graph = + Kokkos::Experimental::create_graph(Kokkos::DefaultHostExecutionSpace{}); + graph.instantiate(); + graph.submit(); + graph.get_execution_space().fence(); +} + template struct FetchValuesAndContribute { static_assert(std::is_same_v +#include +#include + +#include +#include +#include + +#include + +namespace { + +template +struct Increment { + ViewType data; + + KOKKOS_FUNCTION + void operator()(const int) const { ++data(); } +}; + +class TEST_CATEGORY_FIXTURE(GraphInterOp) : public ::testing::Test { + public: + using execution_space = Kokkos::Cuda; + using view_t = + Kokkos::View>; + using graph_t = Kokkos::Experimental::Graph; + + void SetUp() override { + data = view_t(Kokkos::view_alloc(exec, "witness")); + + graph = Kokkos::Experimental::create_graph(exec); + + auto root = Kokkos::Impl::GraphAccess::create_root_ref(*graph); + + root.then_parallel_for(1, Increment{data}); + } + + protected: + const execution_space exec{}; + view_t data; + std::optional graph; +}; + +// This test checks the promises of Kokkos::Graph against its +// underlying Cuda native objects. +TEST_F(TEST_CATEGORY_FIXTURE(GraphInterOp), promises_on_native_objects) { + // Before instantiation, the Cuda graph is valid, but the Cuda executable + // graph is still null. + cudaGraph_t cuda_graph = graph->native_graph(); + + ASSERT_NE(cuda_graph, nullptr); + ASSERT_EQ(graph->native_graph_exec(), nullptr); + + // After instantiation, both native objects are valid. + graph->instantiate(); + + cudaGraphExec_t cuda_graph_exec = graph->native_graph_exec(); + + ASSERT_EQ(graph->native_graph(), cuda_graph); + ASSERT_NE(cuda_graph_exec, nullptr); + + // Submission should not affect the underlying objects. + graph->submit(); + + ASSERT_EQ(graph->native_graph(), cuda_graph); + ASSERT_EQ(graph->native_graph_exec(), cuda_graph_exec); +} + +// Count the number of nodes. This is useful to ensure no spurious +// (possibly empty) node is added. +TEST_F(TEST_CATEGORY_FIXTURE(GraphInterOp), count_nodes) { + graph->instantiate(); + + size_t num_nodes = 0; + + KOKKOS_IMPL_CUDA_SAFE_CALL( + cudaGraphGetNodes(graph->native_graph(), nullptr, &num_nodes)); + + ASSERT_EQ(num_nodes, 2u); +} + +// Use native Cuda graph to generate a DOT representation. +TEST_F(TEST_CATEGORY_FIXTURE(GraphInterOp), debug_dot_print) { +#if CUDA_VERSION < 11600 + GTEST_SKIP() << "Export a graph to DOT requires Cuda 11.6."; +#else + graph->instantiate(); + + const auto dot = std::filesystem::temp_directory_path() / "cuda_graph.dot"; + + // Convert path to string then to const char * to make it work on Windows. + KOKKOS_IMPL_CUDA_SAFE_CALL( + cudaGraphDebugDotPrint(graph->native_graph(), dot.string().c_str(), + cudaGraphDebugDotFlagsVerbose)); + + ASSERT_TRUE(std::filesystem::exists(dot)); + ASSERT_GT(std::filesystem::file_size(dot), 0u); + + // We could write a check against the full kernel's function signature, but + // it would make the test rely too much on internal implementation details. + // Therefore, we just look for the functor and policy. Note that the + // signature is mangled in the 'dot' output. + const std::string expected("[A-Za-z0-9_]+Increment[A-Za-z0-9_]+RangePolicy"); + + std::stringstream buffer; + buffer << std::ifstream(dot).rdbuf(); + + ASSERT_TRUE(std::regex_search(buffer.str(), std::regex(expected))) + << "Could not find expected signature regex " << std::quoted(expected) + << " in " << dot; +#endif +} + +// Ensure that the graph has been instantiated with the default flag. +TEST_F(TEST_CATEGORY_FIXTURE(GraphInterOp), instantiation_flags) { +#if CUDA_VERSION < 12000 + GTEST_SKIP() << "Graph instantiation flag inspection requires Cuda 12."; +#else + unsigned long long flags = + Kokkos::Experimental::finite_max_v; + KOKKOS_IMPL_CUDA_SAFE_CALL( + cudaGraphExecGetFlags(graph->native_graph_exec(), &flags)); + + ASSERT_EQ(flags, 0u); +#endif +} + +} // namespace diff --git a/core/unit_test/hip/TestHIP_InterOp_Graph.cpp b/core/unit_test/hip/TestHIP_InterOp_Graph.cpp new file mode 100644 index 00000000000..68e51f9b2d7 --- /dev/null +++ b/core/unit_test/hip/TestHIP_InterOp_Graph.cpp @@ -0,0 +1,119 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include + +#include +#include +#include + +#include + +namespace { + +template +struct Increment { + ViewType data; + + KOKKOS_FUNCTION + void operator()(const int) const { ++data(); } +}; + +// This test checks the promises of Kokkos::Graph against its +// underlying HIP native objects. +TEST(TEST_CATEGORY, graph_promises_on_native_objects) { +#if !defined(KOKKOS_IMPL_HIP_NATIVE_GRAPH) + GTEST_SKIP() << "This test will not work without native graph support"; +#else + auto graph = Kokkos::Experimental::create_graph(); + + auto root = Kokkos::Impl::GraphAccess::create_root_ref(graph); + + // Before instantiation, the HIP graph is valid, but the HIP executable + // graph is still null. + hipGraph_t hip_graph = graph.native_graph(); + + ASSERT_NE(hip_graph, nullptr); + ASSERT_EQ(graph.native_graph_exec(), nullptr); + + // After instantiation, both native objects are valid. + graph.instantiate(); + + hipGraphExec_t hip_graph_exec = graph.native_graph_exec(); + + ASSERT_EQ(graph.native_graph(), hip_graph); + ASSERT_NE(hip_graph_exec, nullptr); + + // Submission should not affect the underlying objects. + graph.submit(); + + ASSERT_EQ(graph.native_graph(), hip_graph); + ASSERT_EQ(graph.native_graph_exec(), hip_graph_exec); +#endif +} + +// Use native HIP graph to generate a DOT representation. +TEST(TEST_CATEGORY, graph_instantiate_and_debug_dot_print) { +#if !defined(KOKKOS_IMPL_HIP_NATIVE_GRAPH) + GTEST_SKIP() << "This test will not work without native graph support"; +#else + using view_t = Kokkos::View; + + const Kokkos::HIP exec{}; + + view_t data(Kokkos::view_alloc(exec, "witness")); + + auto graph = Kokkos::Experimental::create_graph(exec); + + auto root = Kokkos::Impl::GraphAccess::create_root_ref(graph); + + root.then_parallel_for(1, Increment{data}); + + graph.instantiate(); + + size_t num_nodes = 0; + + KOKKOS_IMPL_HIP_SAFE_CALL( + hipGraphGetNodes(graph.native_graph(), nullptr, &num_nodes)); + + ASSERT_EQ(num_nodes, 2u); + + const auto dot = std::filesystem::temp_directory_path() / "hip_graph.dot"; + + KOKKOS_IMPL_HIP_SAFE_CALL(hipGraphDebugDotPrint( + graph.native_graph(), dot.c_str(), hipGraphDebugDotFlagsVerbose)); + + ASSERT_TRUE(std::filesystem::exists(dot)); + ASSERT_GT(std::filesystem::file_size(dot), 0u); + + // We could write a check against the full kernel's function signature, but + // it would make the test rely too much on internal implementation details. + // Therefore, we just look for the functor and policy. Note that the + // signature is mangled in the 'dot' output. + const std::string expected("[A-Za-z0-9_]+Increment[A-Za-z0-9_]+RangePolicy"); + + std::stringstream buffer; + buffer << std::ifstream(dot).rdbuf(); + + ASSERT_TRUE(std::regex_search(buffer.str(), std::regex(expected))) + << "Could not find expected signature regex " << std::quoted(expected) + << " in " << dot; +#endif +} + +} // namespace diff --git a/core/unit_test/sycl/TestSYCL_InterOp_Graph.cpp b/core/unit_test/sycl/TestSYCL_InterOp_Graph.cpp new file mode 100644 index 00000000000..749deee27d2 --- /dev/null +++ b/core/unit_test/sycl/TestSYCL_InterOp_Graph.cpp @@ -0,0 +1,104 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include + +#include +#include +#include + +#include + +namespace { + +template +struct Increment { + ViewType data; + + KOKKOS_FUNCTION + void operator()(const int) const { ++data(); } +}; + +TEST(TEST_CATEGORY, graph_get_native_return_types_are_references) { + using graph_t = Kokkos::Experimental::Graph; + static_assert( + std::is_reference_v().native_graph())>); + static_assert(std::is_reference_v< + decltype(std::declval().native_graph_exec())>); +} + +// This test checks the promises of Kokkos::Graph against its +// underlying SYCL native objects. +TEST(TEST_CATEGORY, graph_promises_on_native_objects) { + auto graph = Kokkos::Experimental::create_graph(); + + auto root = Kokkos::Impl::GraphAccess::create_root_ref(graph); + + // Before instantiation, the SYCL graph is valid, but the SYCL executable + // graph is still null. Since the SYCL command graph is a regular object, + // no check is needed. + // However, the executable SYCL command graph is stored as an optional, + // so let's check it is empty for now. + ASSERT_FALSE(graph.native_graph_exec().has_value()); + + // After instantiation, both native objects are valid. + graph.instantiate(); + + ASSERT_TRUE(graph.native_graph_exec().has_value()); +} + +// Use native SYCL graph to generate a DOT representation. +TEST(TEST_CATEGORY, graph_instantiate_and_debug_dot_print) { + using view_t = Kokkos::View; + + const Kokkos::SYCL exec{}; + + view_t data(Kokkos::view_alloc(exec, "witness")); + + auto graph = Kokkos::Experimental::create_graph(exec); + + auto root = Kokkos::Impl::GraphAccess::create_root_ref(graph); + + root.then_parallel_for(1, Increment{data}); + + graph.instantiate(); + + ASSERT_EQ(graph.native_graph().get_nodes().size(), 2u); + + const auto dot = std::filesystem::temp_directory_path() / "sycl_graph.dot"; + + graph.native_graph().print_graph(dot, true); + + ASSERT_TRUE(std::filesystem::exists(dot)); + ASSERT_GT(std::filesystem::file_size(dot), 0u); + + // We could write a check against the full kernel's function signature, but + // it would make the test rely too much on internal implementation details. + // Therefore, we just look for the functor and policy. Note that the + // signature is mangled in the 'dot' output. + const std::string expected("[A-Za-z0-9_]+Increment[A-Za-z0-9_]+RangePolicy"); + + std::stringstream buffer; + buffer << std::ifstream(dot).rdbuf(); + + ASSERT_TRUE(std::regex_search(buffer.str(), std::regex(expected))) + << "Could not find expected signature regex " << std::quoted(expected) + << " in " << dot; +} + +} // namespace