Skip to content

Commit

Permalink
Adding kokkos support.
Browse files Browse the repository at this point in the history
  • Loading branch information
khuck committed Aug 7, 2020
1 parent f220829 commit bfc461e
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/apex/CMakeLists.standalone
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ endif(APEX_WITH_CUDA)
SET(all_SOURCE
${CUPTI_SOURCE}
apex.cpp
apex_kokkos.cpp
apex_options.cpp
apex_policies.cpp
${bfd_SOURCE}
Expand Down
278 changes: 278 additions & 0 deletions src/apex/apex_kokkos.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
/* https://github.com/kokkos/kokkos-tools/wiki/Profiling-Hooks
* This page documents the interface between Kokkos and the profiling library.
* Every function prototype on this page is an interface hook. Profiling
* libraries may define any subset of the hooks listed here; hooks which are
* not defined by the library will be silently ignored by Kokkos. The hooks
* have C linkage, and we emphasize this with the extern "C" required to define
* such symbols in C++. If the profiling library is written in C, the
* extern "C" should be omitted.
*/

#include "apex_kokkos.hpp"
#include "apex_api.hpp"
#include <sstream>
#include <unordered_map>
#include <mutex>
#include <stack>
#include <vector>

static std::mutex memory_mtx;
static std::unordered_map<void*,apex::profiler*>& memory_map() {
static std::unordered_map<void*,apex::profiler*> themap;
return themap;
}
static std::stack<apex::profiler*>& timer_stack() {
static APEX_NATIVE_TLS std::stack<apex::profiler*> thestack;
return thestack;
}
static std::mutex section_mtx;
static std::vector<std::string>& sections() {
static std::vector<std::string> thevector;
return thevector;
}
static std::unordered_map<uint32_t, apex::profiler*>& active_sections() {
static std::unordered_map<uint32_t, apex::profiler*> themap;
return themap;
}

/* This function will be called only once, prior to calling any other hooks
* in the profiling library. Currently the only argument which is non-zero
* is version, which will specify the version of the interface (which will
* allow future changes to the interface). The version is an integer encoding
* a date as ((year*100)+month)*100, and the current interface version is
* 20150628.
*/
extern "C" void kokkosp_init_library(int loadseq, uint64_t version,
uint32_t ndevinfos, KokkosPDeviceInfo* devinfos) {
APEX_UNUSED(loadseq);
APEX_UNUSED(version);
APEX_UNUSED(ndevinfos);
APEX_UNUSED(devinfos);
apex::init("APEX Kokkos handler", 0, 1);
}

/* This function will be called only once, after all other calls to
* profiling hooks.
*/
extern "C" void kokkosp_finalize_library() {
apex::finalize();
}

/* These functions are called before their respective parallel constructs
* execute (Kokkos::parallel_for, Kokkos::parallel_reduce,
* Kokkos::parallel_scan). The name argument is the name given by the user
* to the parallel construct, or in the case no name was given it is the
* compiler-dependent type name of the functor or lambda given to the construct.
* Currently devid is always zero. kernid is an output variable: the profiling
* library assigns a value to this, and that value will be given to the
* corresponding kokkosp_end_parallel_* call at the end of the parallel
* construct.
*/
extern "C" void kokkosp_begin_parallel_for(const char* name,
uint32_t devid, uint64_t* kernid) {
std::stringstream ss;
ss << "Kokkos for, Dev: " << devid << ", " << name;
std::string tmp{ss.str()};
// Start a new profiler, with no known parent
// (current timer on stack, if exists)
auto p = apex::start(tmp);
// save the task wrapper in the kernid
*(kernid) = (uint64_t)p;
}

extern "C" void kokkosp_begin_parallel_reduce(const char* name,
uint32_t devid, uint64_t* kernid) {
std::stringstream ss;
ss << "Kokkos reduce, Dev: " << devid << ", " << name;
std::string tmp{ss.str()};
// Start a new profiler, with no known parent
// (current timer on stack, if exists)
auto p = apex::start(tmp);
// save the task wrapper in the kernid
*(kernid) = (uint64_t)p;
}

extern "C" void kokkosp_begin_parallel_scan(const char* name,
uint32_t devid, uint64_t* kernid) {
std::stringstream ss;
ss << "Kokkos scan, Dev: " << devid << ", " << name;
std::string tmp{ss.str()};
// Start a new profiler, with no known parent
// (current timer on stack, if exists)
auto p = apex::start(tmp);
// save the task wrapper in the kernid
*(kernid) = (uint64_t)p;
}

extern "C" void kokkosp_end_parallel_for(uint64_t kernid) {
apex::profiler * p = (apex::profiler*)(kernid);
apex::stop(p);
}

extern "C" void kokkosp_end_parallel_reduce(uint64_t kernid) {
apex::profiler * p = (apex::profiler*)(kernid);
apex::stop(p);
}

extern "C" void kokkosp_end_parallel_scan(uint64_t kernid) {
apex::profiler * p = (apex::profiler*)(kernid);
apex::stop(p);
}

/* This function will be called by
* Kokkos::Profiling::pushRegion(const std::string& cpp_name). The name passed
* to it is the C equivalent of the cpp_name given to
* Kokkos::Profiling::pushRegion. As the function names imply, regions are meant
* to be treated in a stack fashion, ideally consistent with the calling stack
* of the application. One natural way to use them is to call pushRegion at the
* beginning of an application function, and call popRegion at the end of the
* application function. This helps the profiling library group other events
* like parallel_for calls and memory allocations, and organize them according
* to the higher-level flow of the application for better presentation to the
* user.
*/
extern "C" void kokkosp_push_profile_region(const char* name) {
std::stringstream ss;
ss << "Kokkos region, " << name;
std::string tmp{ss.str()};
// Start a new profiler, with no known parent
// (current timer on stack, if exists)
auto p = apex::start(tmp);
timer_stack().push(p);
}

/* This function will be called by Kokkos::Profiling::popRegion(). In
* accordance with the stack convention, the region being popped is the one
* named by the last call to pushRegion.
*/
extern "C" void kokkosp_pop_profile_region() {
apex::profiler * p = timer_stack().top();
apex::stop(p);
timer_stack().pop();
}

/* This function will be called whenever a shared allocation is created to
* support a Kokkos::View. The handle refers to the Kokkos MemorySpace where
* the memory resides, the name is the name given by the user to the View. The
* ptr and size parameters describe the block of memory as its starting pointer
* and size in bytes.
*/
extern "C" void kokkosp_allocate_data(SpaceHandle_t handle, const char* name,
void* ptr, uint64_t size) {
std::stringstream ss;
ss << "Kokkos " << handle.name << " data, " << name;
std::string tmp{ss.str()};
auto p = apex::start(tmp);
memory_mtx.lock();
memory_map().insert(std::pair<void*,apex::profiler*>(ptr, p));
memory_mtx.unlock();
ss << ": Bytes";
std::string tmp2{ss.str()};
double bytes = (double)(size);
apex::sample_value(tmp2, bytes);
}

/* This function will be called whenever a shared allocation is destroyed. The
* handle refers to the Kokkos MemorySpace where the memory resides, the name is
* the name given by the user to the View. The ptr and size parameters describe
* the block of memory as its starting pointer and size in bytes.
*/
extern "C" void kokkosp_deallocate_data(SpaceHandle handle, const char* name,
void* ptr, uint64_t size) {
APEX_UNUSED(handle);
APEX_UNUSED(name);
APEX_UNUSED(size);
memory_mtx.lock();
auto p = memory_map()[ptr];
memory_map().erase(ptr);
memory_mtx.unlock();
apex::stop(p);
}

/* This function will be called whenever a Kokkos::deep_copy function is
* called on a contiguous view (i.e. it is not a remapping operation from for
* example one layout to another). The dst_handle and src_handle refer to the
* MemorySpace of the destination and source allocations respectively. dst_name
* and src_name are the user provided names of the allocations, while dst_ptr
* and src_ptr are the respective data pointers. size is the size in bytes of
* the allocations.
*/
extern "C" void kokkosp_begin_deep_copy(
SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
SpaceHandle src_handle, const char* src_name, const void* src_ptr,
uint64_t size) {
std::stringstream ss;
ss << "Kokkos deep copy: " << src_handle.name << " " << src_name
<< " -> " << dst_handle.name << " " << dst_name;
std::string tmp{ss.str()};
auto p = apex::start(tmp);
timer_stack().push(p);
ss << ": Bytes";
std::string tmp2{ss.str()};
double bytes = (double)(size);
apex::sample_value(tmp2, bytes);
APEX_UNUSED(src_ptr);
APEX_UNUSED(dst_ptr);
}

/* This function marks the end of a Kokkos::deep_copy call following a
* kokkosp_begind_deep_copy call.
*/
extern "C" void kokkosp_end_deep_copy() {
auto p = timer_stack().top();
apex::stop(p);
timer_stack().pop();
}

/* Create a profiling section handle. Sections can overlap with each other
* in contrast to Regions which behave like a stack. name is a user provided
* name for the section and sec_id is used to return a section identifier to
* the user. Note that sec_id should be unique for an open section, but name
* may not be unique. Hence there may be multiple sections with the same name.
*/
extern "C" void kokkosp_create_profile_section( const char* name,
uint32_t* sec_id) {
std::string tmp{name};
section_mtx.lock();
*sec_id = (uint32_t)(sections().size());
sections().push_back(tmp);
section_mtx.unlock();
}

/* Start a profiling section using a previously created section id. A
* profiling section can be started multiple times, assuming it was first
* stopped each time.
*/
extern "C" void kokkosp_start_profile_section( uint32_t sec_id) {
section_mtx.lock();
auto name = sections()[sec_id];
section_mtx.unlock();
auto p = apex::start(name);
section_mtx.lock();
active_sections().insert(std::pair<uint32_t, apex::profiler*>(sec_id, p));
section_mtx.unlock();
}

/* Stop a profiling section using a previously created section id.
*/
extern "C" void kokkosp_stop_profile_section( uint32_t sec_id) {
section_mtx.lock();
auto p = active_sections()[sec_id];
section_mtx.unlock();
apex::stop(p);
}

/* Destroy a previously created profiling section.
*/
extern "C" void kokkosp_destroy_profile_section( uint32_t sec_id) {
section_mtx.lock();
active_sections().erase(sec_id);
section_mtx.unlock();
}

/* Marks an event during an application with a user provided name.
*/
extern "C" void kokkosp_profile_event( const char* name ) {
apex::sample_value(name, 0.0);
}

12 changes: 12 additions & 0 deletions src/apex/apex_kokkos.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include <cstdint>

typedef struct KokkosPDeviceInfo {
uint32_t deviceID;
} KokkosPDeviceInfo_t;

/* This handle describes a Kokkos memory space. The name member is a
* zero-padded string which currently can take the values "Host" or "Cuda".
*/
typedef struct SpaceHandle {
char name[64];
} SpaceHandle_t;

0 comments on commit bfc461e

Please sign in to comment.