Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:khuck/xpress-apex into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
khuck committed Aug 11, 2020
2 parents 9cea99f + 5373cf4 commit 2bc65bb
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 69 deletions.
149 changes: 88 additions & 61 deletions src/apex/activity_trace_async.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ static void memoryActivity(CUpti_Activity *record) {
CUpti_ActivityMemcpy *memcpy = (CUpti_ActivityMemcpy *) record;
std::string name{getMemcpyKindString(memcpy->copyKind)};
store_profiler_data(name, memcpy->correlationId, memcpy->start,
memcpy->end, memcpy->deviceId, memcpy->contextId, 0);
memcpy->end, memcpy->deviceId, memcpy->contextId, memcpy->streamId);
if (apex::apex_options::use_cuda_counters()) {
store_counter_data("GPU: Bytes", name, memcpy->end,
memcpy->bytes, true);
Expand All @@ -307,6 +307,8 @@ static void unifiedMemoryActivity(CUpti_Activity *record) {
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_HTOD
|| memcpy->counterKind ==
CUPTI_ACTIVITY_UNIFIED_MEMORY_COUNTER_KIND_BYTES_TRANSFER_DTOH) {
// The context isn't available, and the streamID isn't valid
// (per CUPTI documentation)
store_profiler_data(name, 0, memcpy->start, memcpy->end,
device, 0, 0);
if (apex::apex_options::use_cuda_counters()) {
Expand Down Expand Up @@ -661,6 +663,75 @@ bool getBytesIfMalloc(CUpti_CallbackId id, const void* params, std::string conte
return true;
}

void register_new_context(const void *params) {
//printf("New Context\n");
APEX_UNUSED(params);
#if 0
CUpti_ResourceData * rd = (CUpti_ResourceData*)(params);
/* Register for async activity ON THIS CONTEXT! */
CUPTI_CALL(cuptiActivityEnableContext(rd->context,
CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); // 10
CUPTI_CALL(cuptiActivityEnableContext(rd->context,
CUPTI_ACTIVITY_KIND_MEMCPY)); // 1
CUPTI_CALL(cuptiActivityEnableContext(rd->context,
CUPTI_ACTIVITY_KIND_MEMCPY2)); // 22
CUPTI_CALL(cuptiActivityEnableContext(rd->context,
CUPTI_ACTIVITY_KIND_MEMSET)); // 2
#else
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); // 10
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); // 1
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY2)); // 22
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET)); // 2
#endif
//CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENACC_DATA)); // 33
//CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH)); // 34
#if 0
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); // 3 <- disables concurrency
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER)); // 4
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); // 5
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EVENT)); // 6
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_METRIC)); // 7
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE)); // 8
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONTEXT)); // 9
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NAME)); // 11
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MARKER)); // 12
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MARKER_DATA)); // 13
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR)); // 14
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS)); // 15
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_BRANCH)); // 16
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD)); // 17
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CDP_KERNEL)); // 18
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PREEMPTION)); // 19
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_ENVIRONMENT)); // 20
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EVENT_INSTANCE)); // 21
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_METRIC_INSTANCE)); // 23
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION)); // 24
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); // 25
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_FUNCTION)); // 26
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MODULE)); // 27
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE)); // 28
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_SHARED_ACCESS)); // 29
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING)); // 30
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO)); // 31
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION)); // 32
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENACC_OTHER)); // 35
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CUDA_EVENT)); // 36
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_STREAM)); // 37
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_SYNCHRONIZATION)); // 38
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION)); // 39
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NVLINK)); // 40
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT)); // 41
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT_INSTANCE)); // 42
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC)); // 43
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC_INSTANCE)); // 44
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMORY)); // 45
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PCIE)); // 46
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENMP)); // 47
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API)); // 48
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_COUNT)); // 49
#endif
}

void apex_cupti_callback_dispatch(void *ud, CUpti_CallbackDomain domain,
CUpti_CallbackId id, const void *params) {
static bool initialized = initialize_first_time();
Expand All @@ -677,6 +748,13 @@ void apex_cupti_callback_dispatch(void *ud, CUpti_CallbackDomain domain,
APEX_UNUSED(domain);
if (!apex::thread_instance::is_worker()) { return; }
if (params == NULL) { return; }

if (domain == CUPTI_CB_DOMAIN_RESOURCE &&
id == CUPTI_CBID_RESOURCE_CONTEXT_CREATED) {
register_new_context(params);
return;
}

CUpti_CallbackData * cbdata = (CUpti_CallbackData*)(params);

if (cbdata->callbackSite == CUPTI_API_ENTER) {
Expand Down Expand Up @@ -747,72 +825,19 @@ void initTrace() {
if (apex::apex_options::use_cuda_driver_api()) {
CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_DRIVER_API));
}
// Make sure we see CUPTI_CBID_RESOURCE_CONTEXT_CREATED events!
CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_RESOURCE));

/* These events aren't begin/end callbacks, so no need to support them. */
//CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_SYNCHRONIZE));
//CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_RESOURCE));
//CUPTI_CALL(cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_NVTX));

/* Register for async activity */

CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); // 10
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); // 1
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY2)); // 22
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET)); // 2
//CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENACC_DATA)); // 33
//CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH)); // 34
#if 0
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); // 3 <- disables concurrency
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER)); // 4
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); // 5
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EVENT)); // 6
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_METRIC)); // 7
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE)); // 8
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONTEXT)); // 9
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NAME)); // 11
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MARKER)); // 12
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MARKER_DATA)); // 13
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR)); // 14
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_GLOBAL_ACCESS)); // 15
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_BRANCH)); // 16
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD)); // 17
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CDP_KERNEL)); // 18
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PREEMPTION)); // 19
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_ENVIRONMENT)); // 20
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EVENT_INSTANCE)); // 21
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_METRIC_INSTANCE)); // 23
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTRUCTION_EXECUTION)); // 24
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_UNIFIED_MEMORY_COUNTER)); // 25
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_FUNCTION)); // 26
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MODULE)); // 27
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE)); // 28
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_SHARED_ACCESS)); // 29
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING)); // 30
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO)); // 31
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTRUCTION_CORRELATION)); // 32
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENACC_OTHER)); // 35
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CUDA_EVENT)); // 36
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_STREAM)); // 37
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_SYNCHRONIZATION)); // 38
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION)); // 39
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NVLINK)); // 40
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT)); // 41
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_EVENT_INSTANCE)); // 42
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC)); // 43
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INSTANTANEOUS_METRIC_INSTANCE)); // 44
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMORY)); // 45
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PCIE)); // 46
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OPENMP)); // 47
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API)); // 48
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_COUNT)); // 49
#endif

// synchronize timestamps
startTimestampCPU = apex::profiler::get_time_ns();
cuptiGetTimestamp(&startTimestampGPU);
// assume CPU timestamp is greater than GPU
deltaTimestamp = (int64_t)(startTimestampCPU) - (int64_t)(startTimestampGPU);
printf("Delta computed to be: %ld\n", deltaTimestamp);
//printf("Delta computed to be: %ld\n", deltaTimestamp);
}

/* This is the global "shutdown" method for flushing the buffer. This is
Expand All @@ -821,10 +846,12 @@ void initTrace() {
namespace apex {
void flushTrace(void) {
if ((num_buffers_processed + 10) < num_buffers) {
flushing = true;
std::cout << "Flushing remaining " << std::fixed
<< num_buffers-num_buffers_processed << " of " << num_buffers
<< " CUDA/CUPTI buffers..." << std::endl;
if (apex::instance()->get_node_id() == 0) {
//flushing = true;
std::cout << "Flushing remaining " << std::fixed
<< num_buffers-num_buffers_processed << " of " << num_buffers
<< " CUDA/CUPTI buffers..." << std::endl;
}
}
cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_NONE);
if (flushing) {
Expand Down
16 changes: 16 additions & 0 deletions src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ thread_instance::get_id(), __func__, __LINE__); fflush(stdout);
#define FUNCTION_EXIT
#endif

#if defined(APEX_HAVE_MPI)
#include "mpi.h"
#endif

APEX_NATIVE_TLS bool _registered = false;
APEX_NATIVE_TLS bool _exited = false;
static bool _initialized = false;
Expand Down Expand Up @@ -2051,6 +2055,18 @@ extern "C" {
return hardware_concurrency();
}

/* When running with MPI and OTF (or other event unification at the end of
* execution) we need to finalize APEX before MPI_Finalize() is called, so
* that we can use MPI for the wrap-up. We can override the weak MPI
* implementation of Finalize, and do what we need to. */
#if defined(APEX_HAVE_MPI)
int MPI_Finalize(void) {
apex::finalize();
int retval = PMPI_Finalize();
apex::cleanup();
return retval;
}
#endif

} // extern "C"

Expand Down
16 changes: 15 additions & 1 deletion src/apex/otf2_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1519,6 +1519,7 @@ namespace apex {
// first, output our number of threads.
//metric_file << thread_instance::get_num_threads() << endl;
metric_file << _event_threads.size() << endl;
metric_file << saved_end_timestamp << endl;
// then iterate over the metrics and write them out.
for (auto const &i : global_metric_indices) {
string id = i.first;
Expand Down Expand Up @@ -1587,6 +1588,12 @@ namespace apex {
std::getline(metric_file, metric_line);
std::string::size_type sz; // alias of size_t
rank_thread_map[i] = std::stoi(metric_line,&sz);
// get the last timestamp
std::getline(metric_file, metric_line);
uint64_t tmp_timestamp = std::stol(metric_line, &sz);
if (saved_end_timestamp < tmp_timestamp) {
saved_end_timestamp = tmp_timestamp;
}
// read the map from that rank
while (std::getline(metric_file, metric_line)) {
rank_metric_map[i] = rank_metric_map[i] + 1;
Expand Down Expand Up @@ -1935,6 +1942,7 @@ namespace apex {
// first, output our number of threads.
//metric_file << thread_instance::get_num_threads() << endl;
metric_file << _event_threads.size() << endl;
metric_file << saved_end_timestamp << endl;
// then iterate over the metrics and write them out.
for (auto const &i : global_metric_indices) {
string id = i.first;
Expand Down Expand Up @@ -1986,6 +1994,12 @@ namespace apex {
std::getline(metric_file, metric_line);
std::string::size_type sz; // alias of size_t
rank_thread_map[i] = std::stoi(metric_line,&sz);
// get the last timestamp
std::getline(metric_file, metric_line);
uint64_t tmp_timestamp = std::stol(metric_line, &sz);
if (saved_end_timestamp < tmp_timestamp) {
saved_end_timestamp = tmp_timestamp;
}
// read the map from that rank
while (std::getline(metric_file, metric_line)) {
rank_metric_map[i] = rank_metric_map[i] + 1;
Expand Down Expand Up @@ -2159,7 +2173,7 @@ namespace apex {
<< ".\nIgnoring event " << p->tt_ptr->task_id->get_name()
<< " with timestamp of " << stamp << " after last event "
<< "with timestamp of " << last << std::endl;
*/
*/
return;
}
// don't close the archive on us!
Expand Down
9 changes: 8 additions & 1 deletion src/apex/profiler_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,10 @@ node_color * get_node_color(double v,double vmin,double vmax)
if (apex_options::use_tau()) {
tau_listener::Tau_start_wrapper("profiler_listener::process_profiles");
}
/*
static auto prof = new_task(__func__);
start(prof);
*/

std::shared_ptr<profiler> p;
task_dependency* td;
Expand Down Expand Up @@ -1206,6 +1210,7 @@ node_color * get_node_color(double v,double vmin,double vmax)
*/
#endif

//stop(prof);
if (apex_options::use_tau()) {
tau_listener::Tau_stop_wrapper("profiler_listener::process_profiles");
}
Expand Down Expand Up @@ -1406,7 +1411,9 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl
if (ignored > 100000) {
std::cerr << "done." << std::endl;
}
finalize_profiles(data);
if (apex_options::process_async_state()) {
finalize_profiles(data);
}
}
if (apex_options::use_taskgraph_output() && node_id == 0)
{
Expand Down
15 changes: 11 additions & 4 deletions src/unit_tests/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
if(MPI_CXX_FOUND)
set(APEX_CUDA_CXX_FLAGS ${MPI_COMPILE_FLAGS})
set(APEX_CUDA_C_FLAGS ${MPI_COMPILE_FLAGS})
set(APEX_CUDA_EXTRA_INCLUDE ${MPI_CXX_INCLUDE_PATH})
set(APEX_CUDA_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS} ${MPI_CXX_LIBRARIES})
endif()

# Make sure the compiler can find include files from our Apex library.
include_directories (${APEX_SOURCE_DIR}/src/apex)
include_directories (${APEX_SOURCE_DIR}/src/apex ${APEX_CUDA_EXTRA_INCLUDE})

# Make sure the linker can find the Apex library once it is built.
link_directories (${APEX_BINARY_DIR}/src/apex)
Expand All @@ -18,14 +25,14 @@ endif (OPENMP_FOUND)

message(INFO "Using CUDA libraries: ${CUDA_LIBRARIES}")

set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} ${APEX_CUDA_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} ${APEX_CUDA_CXX_FLAGS}")

foreach(example_program ${example_programs})
set(sources ${example_program}.cu)
source_group("Source Files" FILES ${sources})
add_executable("${example_program}_cu" ${sources})
target_link_libraries ("${example_program}_cu" apex ${LIBS} OpenMP::OpenMP_CXX CUDA::cuda_driver CUDA::curand)
target_link_libraries ("${example_program}_cu" apex ${LIBS} OpenMP::OpenMP_CXX CUDA::cuda_driver CUDA::curand ${APEX_CUDA_CXX_LINK_FLAGS})
if (BUILD_STATIC_EXECUTABLES)
set_target_properties("${example_program}_cu" PROPERTIES LINK_SEARCH_START_STATIC 1 LINK_SEARCH_END_STATIC 1)
endif()
Expand Down
14 changes: 14 additions & 0 deletions src/unit_tests/CUDA/apex_cuda.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include <string.h>
#include <stdio.h>
#include "apex_api.hpp"
#if defined(APEX_HAVE_MPI)
#include "mpi.h"
#endif

#define ITERATIONS 4

Expand Down Expand Up @@ -36,9 +39,17 @@ void launch(DataElement *elem) {

int main(int argc, char * argv[])
{
#if defined(APEX_HAVE_MPI)
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
apex::init("apex::cuda unit test", rank, size);
#else
APEX_UNUSED(argc);
APEX_UNUSED(argv);
apex::init("apex::cuda unit test", 0, 1);
#endif
apex::apex_options::use_screen_output(true);
DataElement *e;
RUNTIME_API_CALL(cudaMallocManaged((void**)&e, sizeof(DataElement)));
Expand All @@ -56,6 +67,9 @@ int main(int argc, char * argv[])

RUNTIME_API_CALL(cudaFree(e->name));
RUNTIME_API_CALL(cudaFree(e));
#if defined(APEX_HAVE_MPI)
MPI_Finalize();
#endif
apex::finalize();
apex::cleanup();
}
Loading

0 comments on commit 2bc65bb

Please sign in to comment.