diff --git a/CMakeLists.txt b/CMakeLists.txt index fee7e0e7..c2d3262e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -230,8 +230,11 @@ enable_cxx_compiler_flag_if_supported("-Wall") enable_cxx_compiler_flag_if_supported("-Wextra") enable_cxx_compiler_flag_if_supported("-Wpedantic") enable_cxx_compiler_flag_if_supported("-pedantic") +# PGI and Intel don't like the concurrentqueue code. if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") - enable_cxx_compiler_flag_if_supported("-Werror") + if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") + enable_cxx_compiler_flag_if_supported("-Werror") + endif (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") endif (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") # --------------------------------------------------- @@ -279,6 +282,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") set(CMAKE_CXX_SUPPORT_FLAG "-std=c++14" CACHE STRING "CXX Support Flag" FORCE) endif() endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_SUPPORT_FLAG}") # Check if architecture is x86 or not diff --git a/cmake/Modules/FindOMPT.cmake b/cmake/Modules/FindOMPT.cmake index 84f1a7c0..0fd448e6 100644 --- a/cmake/Modules/FindOMPT.cmake +++ b/cmake/Modules/FindOMPT.cmake @@ -32,7 +32,7 @@ endif() pkg_check_modules(PC_OMPT QUIET OMPT) set(OMPT_DEFINITIONS ${PC_OMPT_CFLAGS_OTHER}) -find_path(OMPT_INCLUDE_DIR ompt.h +find_path(OMPT_INCLUDE_DIR omp-tools.h HINTS ${PC_OMPT_INCLUDEDIR} ${PC_OMPT_INCLUDE_DIRS} ${OMPT_ROOT}/include) find_library(OMPT_LIBRARY NAMES omp iomp5 gomp diff --git a/cmake/tests/ompt_test/ompt_test.cpp b/cmake/tests/ompt_test/ompt_test.cpp index 424da4d2..35f57d65 100644 --- a/cmake/tests/ompt_test/ompt_test.cpp +++ b/cmake/tests/ompt_test/ompt_test.cpp @@ -2,7 +2,7 @@ #include "stdio.h" #include -#include +#include #include #define cb_t(name) (ompt_callback_t)&name @@ -49,11 +49,11 @@ on_ompt_callback_parallel_end( inline static void register_callback(ompt_callbacks_t name, ompt_callback_t cb) { int ret = ompt_set_callback(name, cb); - switch(ret) { + switch(ret) { case ompt_set_never: - fprintf(stderr, "TAU: WARNING: Callback for event %d could not be registered\n", name); - break; - case ompt_set_sometimes: + fprintf(stderr, "TAU: WARNING: Callback for event %d could not be registered\n", name); + break; + case ompt_set_sometimes: printf("TAU: Callback for event %d registered with return value %s\n", name, "ompt_set_sometimes"); break; case ompt_set_sometimes_paired: @@ -95,7 +95,7 @@ extern "C" int ompt_initialize( register_callback(ompt_callback_parallel_end, cb_t(on_ompt_callback_parallel_end)); //register_callback(ompt_callback_task_create, cb_t(on_ompt_callback_task_create)); //register_callback(ompt_callback_task_schedule, cb_t(on_ompt_callback_task_schedule)); - //register_callback(ompt_callback_implicit_task, cb_t(on_ompt_callback_implicit_task)); //Sometimes high-overhead, but unfortunately we cannot avoid this as it is a required event + //register_callback(ompt_callback_implicit_task, cb_t(on_ompt_callback_implicit_task)); //Sometimes high-overhead, but unfortunately we cannot avoid this as it is a required event //register_callback(ompt_callback_thread_begin, cb_t(on_ompt_callback_thread_begin)); //register_callback(ompt_callback_thread_end, cb_t(on_ompt_callback_thread_end)); diff --git a/src/apex/activity_trace_async.cpp b/src/apex/activity_trace_async.cpp index 83f71a4b..292c9fb4 100644 --- a/src/apex/activity_trace_async.cpp +++ b/src/apex/activity_trace_async.cpp @@ -18,7 +18,9 @@ #include "thread_instance.hpp" #include "apex_options.hpp" #include "trace_event_listener.hpp" +#ifdef APEX_HAVE_OTF2 #include "otf2_listener.hpp" +#endif static void __attribute__((constructor)) initTrace(void); //static void __attribute__((destructor)) flushTrace(void); @@ -109,12 +111,13 @@ void store_profiler_data(const std::string &name, uint32_t correlationId, (apex::trace_event_listener*)instance->the_trace_event_listener; tel->on_async_event(device, context, stream, prof); } +#ifdef APEX_HAVE_OTF2 if (apex::apex_options::use_otf2()) { apex::otf2_listener * tol = (apex::otf2_listener*)instance->the_otf2_listener; tol->on_async_event(device, context, stream, prof); } - +#endif // have the listeners handle the end of this task instance->complete_task(tt); } diff --git a/src/apex/apex_ompt.cpp b/src/apex/apex_ompt.cpp index 4dfca81e..e81195ea 100644 --- a/src/apex/apex_ompt.cpp +++ b/src/apex/apex_ompt.cpp @@ -3,7 +3,7 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#include +#include #include #include "string.h" #include "stdio.h" @@ -25,8 +25,9 @@ fprintf( stderr, __VA_ARGS__ ); fflush(stderr); \ #endif std::mutex apex_apex_threadid_mutex; -std::atomic apex_numthreads(0); -APEX_NATIVE_TLS uint64_t apex_threadid(-1); +std::atomic apex_numthreads{0}; +APEX_NATIVE_TLS int64_t apex_threadid{-1}; +static std::atomic enabled{false}; class linked_timer { public: @@ -90,7 +91,10 @@ namespace apex { */ void ompt_force_shutdown(void) { DEBUG_PRINT("Forcing shutdown of OpenMP Tools API\n"); + /* The Intel generated code has some odd destructor race conditions, so + * don't force the runtime to shut down. */ /* OpenMP might not have been used... */ + enabled = false; if (ompt_finalize_tool) { ompt_finalize_tool(); } @@ -102,6 +106,7 @@ void ompt_force_shutdown(void) { void apex_ompt_start(const char * state, ompt_data_t * ompt_data, ompt_data_t * region_data, bool auto_start) { + if (!enabled) { return; } static std::shared_ptr nothing(nullptr); linked_timer* tmp; /* First check if there's a parent "region" - could be a task */ @@ -135,6 +140,7 @@ void apex_ompt_start(const char * state, ompt_data_t * ompt_data, } void apex_ompt_stop(ompt_data_t * ompt_data) { + if (!enabled) { return; } APEX_ASSERT(ompt_data->ptr); void* tmp = ((linked_timer*)(ompt_data->ptr))->prev; delete((linked_timer*)(ompt_data->ptr)); @@ -152,6 +158,7 @@ extern "C" void apex_thread_begin( ompt_thread_t thread_type, /* type of thread */ ompt_data_t *thread_data /* data of thread */) { + if (!enabled) { return; } APEX_UNUSED(thread_data); { std::unique_lock l(apex_apex_threadid_mutex); @@ -182,6 +189,7 @@ extern "C" void apex_thread_begin( extern "C" void apex_thread_end( ompt_data_t *thread_data /* data of thread */ ) { + if (!enabled) { return; } APEX_UNUSED(thread_data); apex::exit_thread(); } @@ -195,6 +203,7 @@ static void apex_parallel_region_begin ( int flags, /* flags */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } APEX_UNUSED(encountering_task_data); APEX_UNUSED(encountering_task_frame); APEX_UNUSED(requested_team_size); @@ -212,6 +221,7 @@ static void apex_parallel_region_end ( int flags, /* flags */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } APEX_UNUSED(encountering_task_data); APEX_UNUSED(flags); APEX_UNUSED(codeptr_ra); @@ -228,6 +238,7 @@ extern "C" void apex_task_create ( int has_dependences, /* created task has dependences */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } APEX_UNUSED(encountering_task_frame); APEX_UNUSED(has_dependences); APEX_UNUSED(codeptr_ra); @@ -289,6 +300,7 @@ extern "C" void apex_task_schedule( ompt_task_status_t prior_task_status, /* status of prior task */ ompt_data_t *next_task_data /* data of next task */ ) { + if (!enabled) { return; } DEBUG_PRINT("%lu: Task Schedule prior: %p, status: %d, next: %p\n", apex_threadid, (void*)prior_task_data, prior_task_status, (void*)next_task_data); if (prior_task_data != nullptr) { linked_timer* prior = (linked_timer*)(prior_task_data->ptr); @@ -327,6 +339,7 @@ extern "C" void apex_implicit_task( unsigned int thread_num, /* thread number of calling thread */ int flags ) { + if (!enabled) { return; } APEX_UNUSED(team_size); APEX_UNUSED(thread_num); APEX_UNUSED(flags); @@ -358,6 +371,7 @@ extern "C" void apex_target ( ompt_id_t target_id, const void *codeptr_ra ) { + if (!enabled) { return; } } /* Event #9, target data */ @@ -369,6 +383,7 @@ extern "C" void apex_target_data_op ( void *device_addr, size_t bytes ) { + if (!enabled) { return; } } /* Event #10, target submit */ @@ -376,6 +391,7 @@ extern "C" void apex_target_submit ( ompt_id_t target_id, ompt_id_t host_op_id ) { + if (!enabled) { return; } } /* Event #11, tool control */ @@ -385,6 +401,7 @@ extern "C" void apex_control( void *arg, /* argument of control call */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } } /* Event #12, device initialize */ @@ -395,12 +412,14 @@ extern "C" void apex_device_initialize ( ompt_function_lookup_t lookup, const char *documentation ) { + if (!enabled) { return; } } /* Event #13, device finalize */ extern "C" void apex_device_finalize ( uint64_t device_num ) { + if (!enabled) { return; } } /* Event #14, device load */ @@ -414,6 +433,7 @@ extern "C" void apex_device_load_t ( void * device_addr, uint64_t module_id ) { + if (!enabled) { return; } } /* Event #15, device load */ @@ -421,6 +441,7 @@ extern "C" void apex_device_unload ( uint64_t device_num, uint64_t module_id ) { + if (!enabled) { return; } } #endif // placeholder functions @@ -441,6 +462,7 @@ extern "C" void apex_sync_region_wait ( ompt_data_t *task_data, /* data of task */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } char * tmp_str; static const char * barrier_str = "Barrier Wait"; static const char * barrier_i_str = "Implicit Barrier Wait"; @@ -504,6 +526,7 @@ extern "C" void apex_ompt_work ( uint64_t count, /* quantity of work */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } APEX_UNUSED(count); // unused on end char * tmp_str; @@ -584,6 +607,7 @@ extern "C" void apex_ompt_master ( ompt_data_t *task_data, /* data of task */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } if (endpoint == ompt_scope_begin) { if (codeptr_ra != nullptr) { char regionIDstr[128] = {0}; @@ -660,6 +684,7 @@ extern "C" void apex_ompt_flush ( ompt_data_t *thread_data, /* data of thread */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } APEX_UNUSED(thread_data); if (codeptr_ra != nullptr) { char regionIDstr[128] = {0}; @@ -676,6 +701,7 @@ extern "C" void apex_ompt_cancel ( int flags, /* cancel flags */ const void *codeptr_ra /* return address of runtime call */ ) { + if (!enabled) { return; } char regionIDstr[128] = {0}; if (flags & ompt_cancel_parallel) { if (codeptr_ra != nullptr) { @@ -747,6 +773,7 @@ extern "C" void apex_ompt_cancel ( extern "C" void apex_ompt_idle ( ompt_scope_endpoint_t endpoint /* endpoint of idle time */ ) { + if (!enabled) { return; } static APEX_NATIVE_TLS apex::profiler* p = nullptr; if (endpoint == ompt_scope_begin) { p = apex::start("OpenMP Idle"); @@ -935,6 +962,7 @@ int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, } } + enabled = true; DEBUG_PRINT("done.\n"); fflush(stderr); return 1; @@ -944,6 +972,7 @@ void ompt_finalize(ompt_data_t* tool_data) { APEX_UNUSED(tool_data); DEBUG_PRINT("OpenMP runtime is shutting down...\n"); + enabled = false; apex::finalize(); } diff --git a/src/openmp/ompt_master.c b/src/openmp/ompt_master.c index aba78a00..a778cdc2 100644 --- a/src/openmp/ompt_master.c +++ b/src/openmp/ompt_master.c @@ -4,7 +4,6 @@ int main (void) { int a, i; - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); #pragma omp parallel shared(a) private(i) { @@ -16,5 +15,4 @@ int main (void) { #pragma omp master printf ("Sum is %d\n", a); } - apex_finalize(); } \ No newline at end of file diff --git a/src/openmp/ompt_parallel_region.c b/src/openmp/ompt_parallel_region.c index de46437e..9f8d313e 100644 --- a/src/openmp/ompt_parallel_region.c +++ b/src/openmp/ompt_parallel_region.c @@ -4,7 +4,6 @@ #include "apex.h" int main (int argc, char** argv) { - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); #pragma omp parallel { diff --git a/src/openmp/ompt_reduction.c b/src/openmp/ompt_reduction.c index bc20303b..31ef6ceb 100644 --- a/src/openmp/ompt_reduction.c +++ b/src/openmp/ompt_reduction.c @@ -11,7 +11,6 @@ int main (int argc, char** argv) { int i, n, chunk; double a[100], b[100], result; - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); /* Some initializations */ @@ -30,7 +29,6 @@ int main (int argc, char** argv) { printf("Final result= %f\n",result); - apex_finalize(); return 0; } diff --git a/src/openmp/ompt_sections.c b/src/openmp/ompt_sections.c index c6e5afeb..0f56bb47 100644 --- a/src/openmp/ompt_sections.c +++ b/src/openmp/ompt_sections.c @@ -13,7 +13,6 @@ void c() { } int main (void) { - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); #pragma omp parallel sections { @@ -24,5 +23,4 @@ int main (void) { #pragma omp section c(); } - apex_finalize(); } \ No newline at end of file diff --git a/src/openmp/ompt_single.c b/src/openmp/ompt_single.c index 6d8eef61..eb46a578 100644 --- a/src/openmp/ompt_single.c +++ b/src/openmp/ompt_single.c @@ -4,7 +4,6 @@ int main (void) { int a, i; - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); #pragma omp parallel shared(a) private(i) { @@ -16,5 +15,4 @@ int main (void) { #pragma omp single printf ("Sum is %d\n", a); } - apex_finalize(); } \ No newline at end of file diff --git a/src/openmp/ompt_sync_region_wait.c b/src/openmp/ompt_sync_region_wait.c index 97e3d1cb..bb438ffd 100644 --- a/src/openmp/ompt_sync_region_wait.c +++ b/src/openmp/ompt_sync_region_wait.c @@ -10,7 +10,6 @@ int main (int argc, char** argv) { int i, n, chunk; double a[100], b[100], result; - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); /* Some initializations */ @@ -33,7 +32,6 @@ int main (int argc, char** argv) { printf("Final result= %f\n",result); - apex_finalize(); return 0; } diff --git a/src/openmp/ompt_target.c b/src/openmp/ompt_target.c index 44a9804a..69254122 100644 --- a/src/openmp/ompt_target.c +++ b/src/openmp/ompt_target.c @@ -8,7 +8,6 @@ double start; double end; int main (void) { int sum = 0; - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); start = omp_get_wtime(); printf("Num devices available: %d\n",omp_get_num_devices() ); @@ -21,6 +20,5 @@ int main (void) { end = omp_get_wtime(); printf ("time %f\n",(end-start)); printf("sum = %d\n",sum); - apex_finalize(); return 0; } diff --git a/src/openmp/ompt_thread.c b/src/openmp/ompt_thread.c index 7b02580b..b3c4990b 100644 --- a/src/openmp/ompt_thread.c +++ b/src/openmp/ompt_thread.c @@ -4,7 +4,6 @@ #include "apex.h" int main (int argc, char** argv) { - apex_init(__func__, 0, 1); apex_set_use_screen_output(1); #pragma omp parallel { @@ -13,7 +12,6 @@ int main (int argc, char** argv) { omp_get_num_threads()); fflush(stdout); } - apex_finalize(); return 0; } diff --git a/src/unit_tests/C++/CMakeLists.txt b/src/unit_tests/C++/CMakeLists.txt index 19b099de..b12bc81d 100644 --- a/src/unit_tests/C++/CMakeLists.txt +++ b/src/unit_tests/C++/CMakeLists.txt @@ -57,6 +57,8 @@ endif() # std::threads crash when linked statically. :( if (NOT BUILD_STATIC_EXECUTABLES) +# Intel can't do std::futures (std::__once_callable) +if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") if ((NOT OTF2_FOUND)) set(example_programs "${example_programs};apex_fibonacci_std_async;apex_fibonacci_std_async2") endif() @@ -66,6 +68,7 @@ if (NOT BUILD_STATIC_EXECUTABLES) set(example_programs "${example_programs};apex_throttle_event") endif (APEX_THROTTLE) endif ((NOT DEFINED TAU_ROOT) AND (NOT USE_TAU) AND (NOT TAU_FOUND)) +endif (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") endif() foreach(example_program ${example_programs})