From f7153f6103f7e1f2f176326905522c548c0b7447 Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Mon, 22 Mar 2021 11:02:57 -0700 Subject: [PATCH] Working memory wrapper for malloc/free, removing pointers from name demangling due to instability --- src/apex/CMakeLists.standalone | 2 + src/apex/activity_trace_async.cpp | 39 +-- src/apex/apex.cpp | 79 ++++- src/apex/apex_types.h | 1 + src/apex/memory_wrapper.cpp | 70 +++++ src/apex/proc_read.cpp | 1 + src/apex/profiler_listener.cpp | 1 + src/apex/task_identifier.cpp | 21 +- src/apex/tcmalloc_hooks.cpp | 79 +++++ src/apex/tcmalloc_hooks.hpp | 27 ++ src/apex/utils.cpp | 19 +- src/apex/utils.hpp | 16 +- src/scripts/apex_exec | 15 +- src/wrappers/CMakeLists.txt | 8 +- src/wrappers/dl_auditor.c | 50 ++++ src/wrappers/memory_wrapper.c | 197 ------------- src/wrappers/memory_wrapper.cpp | 357 +++++++++++++++++++++++ src/wrappers/memory_wrapper.h | 31 +- src/wrappers/memory_wrapper_internal.cpp | 144 +++++++-- 19 files changed, 873 insertions(+), 284 deletions(-) create mode 100644 src/apex/memory_wrapper.cpp create mode 100644 src/apex/tcmalloc_hooks.cpp create mode 100644 src/apex/tcmalloc_hooks.hpp create mode 100644 src/wrappers/dl_auditor.c delete mode 100644 src/wrappers/memory_wrapper.c create mode 100644 src/wrappers/memory_wrapper.cpp diff --git a/src/apex/CMakeLists.standalone b/src/apex/CMakeLists.standalone index 5acfa34d..2cf37828 100644 --- a/src/apex/CMakeLists.standalone +++ b/src/apex/CMakeLists.standalone @@ -66,6 +66,7 @@ concurrency_handler.cpp dependency_tree.cpp event_listener.cpp handler.cpp +memory_wrapper.cpp ${OTF2_SOURCE} perftool_implementation.cpp policy_handler.cpp @@ -73,6 +74,7 @@ ${PROC_SOURCE} profiler_listener.cpp ${SENSOR_SOURCE} task_identifier.cpp +tcmalloc_hooks.cpp ${tau_SOURCE} thread_instance.cpp trace_event_listener.cpp diff --git a/src/apex/activity_trace_async.cpp b/src/apex/activity_trace_async.cpp index cb5d4aa8..10596fed 100644 --- a/src/apex/activity_trace_async.cpp +++ b/src/apex/activity_trace_async.cpp @@ -293,6 +293,7 @@ bool register_myself(bool isWorker = true) { void store_profiler_data(const std::string &name, uint32_t correlationId, uint64_t start, uint64_t end, apex::cuda_thread_node &node, bool otf2_trace = true) { + apex::in_apex prevent_deadlocks; // Get the singleton APEX instance static apex::apex* instance = apex::apex::instance(); // get the parent GUID, then erase the correlation from the map @@ -353,6 +354,7 @@ void store_sync_counter_data(const char * name, const std::string& context, /* Handle counters from asynchronous activity */ void store_counter_data(const char * name, const std::string& ctx, uint64_t end, double value, apex::cuda_thread_node &node, bool force = false) { + apex::in_apex prevent_deadlocks; std::stringstream ss; if (name == nullptr) { ss << ctx; @@ -670,45 +672,44 @@ static void kernelActivity(CUpti_Activity *record) { store_profiler_data(tmp, kernel->correlationId, kernel->start, kernel->end, node); if (apex::apex_options::use_cuda_counters()) { - std::string * demangled = apex::demangle(kernel->name); + std::string demangled = apex::demangle(kernel->name); store_counter_data("GPU: Dynamic Shared Memory (B)", - *demangled, kernel->end, kernel->dynamicSharedMemory, node); + demangled, kernel->end, kernel->dynamicSharedMemory, node); store_counter_data("GPU: Local Memory Per Thread (B)", - *demangled, kernel->end, kernel->localMemoryPerThread, node); + demangled, kernel->end, kernel->localMemoryPerThread, node); store_counter_data("GPU: Local Memory Total (B)", - *demangled, kernel->end, kernel->localMemoryTotal, node); + demangled, kernel->end, kernel->localMemoryTotal, node); store_counter_data("GPU: Registers Per Thread", - *demangled, kernel->end, kernel->registersPerThread, node); + demangled, kernel->end, kernel->registersPerThread, node); store_counter_data("GPU: Shared Memory Size (B)", - *demangled, kernel->end, kernel->sharedMemoryExecuted, node); + demangled, kernel->end, kernel->sharedMemoryExecuted, node); store_counter_data("GPU: Static Shared Memory (B)", - *demangled, kernel->end, kernel->staticSharedMemory, node); + demangled, kernel->end, kernel->staticSharedMemory, node); /* Get grid and block values */ if (apex::apex_options::use_cuda_kernel_details()) { store_counter_data("GPU: blockX", - *demangled, kernel->end, kernel->blockX, node); + demangled, kernel->end, kernel->blockX, node); store_counter_data("GPU: blockY", - *demangled, kernel->end, kernel->blockY, node); + demangled, kernel->end, kernel->blockY, node); store_counter_data("GPU: blockZ", - *demangled, kernel->end, kernel->blockZ, node); + demangled, kernel->end, kernel->blockZ, node); store_counter_data("GPU: gridX", - *demangled, kernel->end, kernel->gridX, node); + demangled, kernel->end, kernel->gridX, node); store_counter_data("GPU: gridY", - *demangled, kernel->end, kernel->gridY, node); + demangled, kernel->end, kernel->gridY, node); store_counter_data("GPU: gridZ", - *demangled, kernel->end, kernel->gridZ, node); + demangled, kernel->end, kernel->gridZ, node); if (kernel->queued != CUPTI_TIMESTAMP_UNKNOWN) { store_counter_data("GPU: queue delay (us)", - *demangled, kernel->end, + demangled, kernel->end, (kernel->start - kernel->queued)*1.0e-3, node); } if (kernel->submitted != CUPTI_TIMESTAMP_UNKNOWN) { store_counter_data("GPU: submit delay (us)", - *demangled, kernel->end, + demangled, kernel->end, (kernel->start - kernel->submitted)*1.0e-3, node); } } - delete(demangled); } } @@ -1002,10 +1003,10 @@ bool getBytesIfMalloc(CUpti_CallbackId id, const void* params, std::string conte bool onHost = false; bool managed = false; void* ptr = nullptr; - static std::atomic totalAllocated = 0.0; + static std::atomic totalAllocated{0}; static std::unordered_map memoryMap; std::mutex mapMutex; - static std::atomic hostTotalAllocated = 0.0; + static std::atomic hostTotalAllocated{0}; static std::unordered_map hostMemoryMap; std::mutex hostMapMutex; bool free = false; @@ -1087,11 +1088,13 @@ bool getBytesIfMalloc(CUpti_CallbackId id, const void* params, std::string conte onHost = true; break; } +#ifdef CUPTI_DRIVER_TRACE_CBID_cuMemAddressFree case CUPTI_DRIVER_TRACE_CBID_cuMemAddressFree: { ptr = (void*)((cuMemAddressFree_params_st*)(params))->ptr; free = true; break; } +#endif case CUPTI_DRIVER_TRACE_CBID_cuMemFree: { size_t tmp = (size_t)((cuMemFree_params_st*)(params))->dptr; ptr = (void*)(tmp); diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index 05f81fce..0fce5d68 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -79,6 +79,9 @@ DEFINE_DESTRUCTOR(apex_finalize_static_void) #endif #endif // APEX_HAVE_HPX +#ifdef APEX_HAVE_TCMALLOC +#include "tcmalloc_hooks.hpp" +#endif #if APEX_DEBUG #define FUNCTION_ENTER printf("enter %lu *** %s:%d!\n", \ @@ -374,6 +377,7 @@ hpx::runtime * apex::get_hpx_runtime(void) { uint64_t init(const char * thread_name, uint64_t comm_rank, uint64_t comm_size) { FUNCTION_ENTER + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { FUNCTION_EXIT; return APEX_ERROR; } // FIRST! make sure APEX thinks this is a worker thread (the main thread @@ -458,6 +462,12 @@ uint64_t init(const char * thread_name, uint64_t comm_rank, instance->listeners[i]->on_new_node(node_data); } } +#ifdef APEX_HAVE_TCMALLOC + //tcmalloc::init_hook(); + enable_memory_wrapper(); +#else + enable_memory_wrapper(); +#endif FUNCTION_EXIT return APEX_NOERROR; } @@ -515,6 +525,7 @@ inline std::shared_ptr _new_task( profiler* start(const std::string &timer_name) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { APEX_UTIL_REF_COUNT_DISABLED_START @@ -581,6 +592,7 @@ profiler* start(const std::string &timer_name) } profiler* start(const apex_function_address function_address) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { APEX_UTIL_REF_COUNT_DISABLED_START @@ -646,6 +658,7 @@ void debug_print(const char * event, std::shared_ptr tt_ptr) { } void start(std::shared_ptr tt_ptr) { + in_apex prevent_deadlocks; #if defined(APEX_DEBUG)//_disabled) debug_print("Start", tt_ptr); #endif @@ -679,7 +692,7 @@ void start(std::shared_ptr tt_ptr) { } if (_notify_listeners) { bool success = true; - //cout << thread_instance::get_id() << " Start : " << id->get_name() << + //cout << thread_instance::get_id() << " Start : " <task_id->get_name() << //endl; fflush(stdout); //read_lock_type l(instance->listener_mutex); for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { @@ -704,6 +717,7 @@ void start(std::shared_ptr tt_ptr) { } profiler* resume(const std::string &timer_name) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { APEX_UTIL_REF_COUNT_DISABLED_RESUME @@ -754,6 +768,7 @@ profiler* resume(const std::string &timer_name) { } profiler* resume(const apex_function_address function_address) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { APEX_UTIL_REF_COUNT_DISABLED_RESUME @@ -790,6 +805,7 @@ profiler* resume(const apex_function_address function_address) { } profiler* resume(profiler * p) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { APEX_UTIL_REF_COUNT_DISABLED_RESUME @@ -834,6 +850,7 @@ profiler* resume(profiler * p) { } void reset(const std::string &timer_name) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return; } apex* instance = apex::instance(); // get the Apex static instance @@ -848,6 +865,7 @@ void reset(const std::string &timer_name) { } void reset(apex_function_address function_address) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return; } apex* instance = apex::instance(); // get the Apex static instance @@ -882,6 +900,7 @@ void apex::complete_task(std::shared_ptr task_wrapper_ptr) { } void stop(profiler* the_profiler, bool cleanup) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { APEX_UTIL_REF_COUNT_DISABLED_STOP @@ -931,6 +950,7 @@ void stop(profiler* the_profiler, bool cleanup) { } void stop(std::shared_ptr tt_ptr) { + in_apex prevent_deadlocks; #if defined(APEX_DEBUG)//_disabled) debug_print("Stop", tt_ptr); #endif @@ -968,7 +988,7 @@ void stop(std::shared_ptr tt_ptr) { } } //cout << thread_instance::get_id() << " Stop : " << - //tt_ptr->tt_ptr->get_task_id()->get_name() << endl; fflush(stdout); + //tt_ptr->get_task_id()->get_name() << endl; fflush(stdout); static std::string apex_process_profile_str("apex::process_profiles"); if (p->tt_ptr->get_task_id()->get_name(false).compare(apex_process_profile_str) == 0) { @@ -981,6 +1001,7 @@ void stop(std::shared_ptr tt_ptr) { void yield(profiler* the_profiler) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { APEX_UTIL_REF_COUNT_DISABLED_YIELD @@ -1026,6 +1047,7 @@ void yield(profiler* the_profiler) void yield(std::shared_ptr tt_ptr) { + in_apex prevent_deadlocks; #if defined(APEX_DEBUG)//_disabled) debug_print("Yield", tt_ptr); #endif @@ -1075,6 +1097,7 @@ void yield(std::shared_ptr tt_ptr) void sample_value(const std::string &name, double value, bool threaded) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return; } // if APEX is suspended, do nothing. @@ -1120,6 +1143,7 @@ std::shared_ptr new_task( const uint64_t task_id, const std::shared_ptr parent_task) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } // if APEX is suspended, do nothing. @@ -1146,6 +1170,7 @@ std::shared_ptr new_task( const apex_function_address function_address, const uint64_t task_id, const std::shared_ptr parent_task) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } // if APEX is suspended, do nothing. @@ -1163,6 +1188,7 @@ std::shared_ptr new_task( std::shared_ptr update_task( std::shared_ptr wrapper, const std::string &timer_name) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } // if APEX is suspended, do nothing. @@ -1202,6 +1228,7 @@ std::shared_ptr update_task( std::shared_ptr update_task( std::shared_ptr wrapper, const apex_function_address function_address) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } // if APEX is suspended, do nothing. @@ -1236,6 +1263,7 @@ std::shared_ptr update_task( std::atomic custom_event_count(APEX_CUSTOM_EVENT_1); apex_event_type register_custom_event(const std::string &name) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return APEX_CUSTOM_EVENT_1; } apex* instance = apex::instance(); // get the Apex static instance @@ -1253,6 +1281,7 @@ apex_event_type register_custom_event(const std::string &name) { } void custom_event(apex_event_type event_type, void * custom_data) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return; } // get the Apex static instance @@ -1368,6 +1397,7 @@ void finalize_plugins(void) { } std::string dump(bool reset) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return(std::string("")); } bool old_screen_output = apex_options::use_screen_output(); @@ -1408,26 +1438,27 @@ void ompt_force_shutdown(void); void finalize() { - if (!_initialized) { FUNCTION_EXIT return; } // protect against finalization without initialization - apex* instance = apex::instance(); // get the Apex static instance - if (!instance) { FUNCTION_EXIT return; } // protect against calls after finalization - if (apex_options::use_jupyter_support()) { - // reset all counters, and return. - reset(APEX_NULL_FUNCTION_ADDRESS); - return; - } - FUNCTION_ENTER + in_apex prevent_deadlocks; + if (!_initialized) { return; } // protect against finalization without initialization // prevent re-entry, be extra strict about race conditions - it is // possible. mutex shutdown_mutex; static bool finalized = false; { unique_lock l(shutdown_mutex); - if (finalized) { FUNCTION_EXIT return; }; + if (finalized) { return; }; finalized = true; } + apex* instance = apex::instance(); // get the Apex static instance + if (!instance) { return; } // protect against calls after finalization + if (apex_options::use_jupyter_support()) { + // reset all counters, and return. + reset(APEX_NULL_FUNCTION_ADDRESS); + return; + } // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return; } + FUNCTION_ENTER // FIRST, stop the top level timer, while the infrastructure is still // functioning. if (top_level_timer() != nullptr) { stop(top_level_timer()); } @@ -1447,6 +1478,12 @@ void finalize() /* This could take a while */ #ifdef APEX_WITH_CUDA flushTrace(); +#endif +#ifdef APEX_HAVE_TCMALLOC + //tcmalloc::destroy_hook(); + disable_memory_wrapper(); +#else + disable_memory_wrapper(); #endif // stop processing new timers/counters/messages/tasks/etc. apex_options::suspend(true); @@ -1480,6 +1517,7 @@ void finalize() } void cleanup(void) { + in_apex prevent_deadlocks; FUNCTION_ENTER _program_over = true; #ifdef APEX_HAVE_HPX @@ -1522,6 +1560,7 @@ void cleanup(void) { void register_thread(const std::string &name) { + in_apex prevent_deadlocks; FUNCTION_ENTER // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return; } @@ -1570,6 +1609,7 @@ void register_thread(const std::string &name) void exit_thread(void) { + in_apex prevent_deadlocks; // get the Apex static instance apex* instance = apex::instance(); // protect against calls after finalization @@ -1610,6 +1650,7 @@ void apex::stop_all_policy_handles(void) { apex_policy_handle* register_policy(const apex_event_type when, std::function f) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } int id = -1; @@ -1629,6 +1670,7 @@ apex_policy_handle* register_policy(const apex_event_type when, std::set register_policy(std::set when, std::function f) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return std::set(); @@ -1653,6 +1695,7 @@ int register_policy(std::chrono::duration const& period, apex_policy_handle* register_periodic_policy(unsigned long period_microseconds, std::function f) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } int id = -1; @@ -1761,6 +1804,7 @@ apex_policy_handle * sample_runtime_counter(unsigned long period, const } void deregister_policy(apex_policy_handle * handle) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return; } // disable processing of policy for now @@ -1799,6 +1843,7 @@ void stop_all_async_threads(void) { } apex_profile* get_profile(apex_function_address action_address) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } task_identifier id(action_address); @@ -1809,6 +1854,7 @@ apex_profile* get_profile(apex_function_address action_address) { } apex_profile* get_profile(const std::string &timer_name) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } task_identifier id(timer_name); @@ -1819,6 +1865,7 @@ apex_profile* get_profile(const std::string &timer_name) { } apex_profile* get_profile(const task_identifier &task_id) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return nullptr; } profile * tmp = apex::__instance()->the_profiler_listener->get_profile(task_id); @@ -1858,6 +1905,7 @@ void print_options() { } void send (uint64_t tag, uint64_t size, uint64_t target) { + in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return ; } // if APEX is suspended, do nothing. @@ -1885,6 +1933,7 @@ void send (uint64_t tag, uint64_t size, uint64_t target) { void recv (uint64_t tag, uint64_t size, uint64_t source_rank, uint64_t source_thread) { + in_apex prevent_deadlocks; APEX_UNUSED(source_thread); // if APEX is disabled, do nothing. if (apex_options::disable() == true) { return ; } @@ -2223,6 +2272,10 @@ void yield_adapter(std::shared_ptr tt_p static_pointer_cast(tt_ptr)); } +void sample_value_adapter(const std::string &name, double value) { + APEX_TOP_LEVEL_PACKAGE::sample_value(name, value, false); +} + static void apex_register_with_hpx(void) { hpx::util::external_timer::registration reg; reg.type = hpx::util::external_timer::init_flag; @@ -2241,7 +2294,7 @@ static void apex_register_with_hpx(void) { reg.record.new_task_address = &new_task_adapter; hpx::util::external_timer::register_external_timer(reg); reg.type = hpx::util::external_timer::sample_value_flag; - reg.record.sample_value = &APEX_TOP_LEVEL_PACKAGE::sample_value; + reg.record.sample_value = &sample_value_adapter; hpx::util::external_timer::register_external_timer(reg); reg.type = hpx::util::external_timer::send_flag; reg.record.send = &APEX_TOP_LEVEL_PACKAGE::send; diff --git a/src/apex/apex_types.h b/src/apex/apex_types.h index ccbe6fe4..55072887 100644 --- a/src/apex/apex_types.h +++ b/src/apex/apex_types.h @@ -303,6 +303,7 @@ inline unsigned int sc_nprocessors_onln() macro (APEX_OMPT_HIGH_OVERHEAD_EVENTS, ompt_high_overhead_events, \ bool, false) \ macro (APEX_PIN_APEX_THREADS, pin_apex_threads, bool, true) \ + macro (APEX_TRACK_MEMORY, track_memory, bool, false) \ macro (APEX_TASK_SCATTERPLOT, task_scatterplot, bool, false) \ macro (APEX_TIME_TOP_LEVEL_OS_THREADS, top_level_os_threads, bool, false) \ macro (APEX_POLICY_DRAIN_TIMEOUT, policy_drain_timeout, int, 1000) \ diff --git a/src/apex/memory_wrapper.cpp b/src/apex/memory_wrapper.cpp new file mode 100644 index 00000000..2cd5c101 --- /dev/null +++ b/src/apex/memory_wrapper.cpp @@ -0,0 +1,70 @@ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include "apex_api.hpp" + +namespace apex { + +void enable_memory_wrapper() { + if (!apex_options::track_memory()) { return; } + typedef void (*apex_memory_initialized_t)(); + static apex_memory_initialized_t apex_memory_initialized = NULL; + void * memory_so; + + memory_so = dlopen("libapex_memory_wrapper.so", RTLD_NOW); + + if (memory_so) { + char const * err; + + dlerror(); // reset error flag + apex_memory_initialized = + (apex_memory_initialized_t)dlsym(memory_so, + "apex_memory_initialized"); + // Check for errors + if ((err = dlerror())) { + printf("APEX: ERROR obtaining symbol info in auditor: %s\n", err); + } else { + apex_memory_initialized(); + printf("APEX: Starting memory tracking\n"); + } + dlclose(memory_so); + } else { + printf("APEX: ERROR in opening APEX library in auditor.\n"); + } + dlerror(); // reset error flag +} + +void disable_memory_wrapper() { + if (!apex_options::track_memory()) { return; } + typedef void (*apex_memory_finalized_t)(); + static apex_memory_finalized_t apex_memory_finalized = NULL; + void * memory_so; + + memory_so = dlopen("libapex_memory_wrapper.so", RTLD_NOW); + + if (memory_so) { + char const * err; + + dlerror(); // reset error flag + apex_memory_finalized = + (apex_memory_finalized_t)dlsym(memory_so, + "apex_memory_finalized"); + // Check for errors + if ((err = dlerror())) { + printf("APEX: ERROR obtaining symbol info in auditor: %s\n", err); + } else { + apex_memory_finalized(); + //printf("APEX: Stopping memory tracking\n"); + } + dlclose(memory_so); + } else { + printf("APEX: ERROR in opening APEX library in auditor.\n"); + } + dlerror(); // reset error flag +} + +} diff --git a/src/apex/proc_read.cpp b/src/apex/proc_read.cpp index c321c814..87ea27af 100644 --- a/src/apex/proc_read.cpp +++ b/src/apex/proc_read.cpp @@ -1100,6 +1100,7 @@ namespace apex { /* This is the main function for the reader thread. */ void* proc_data_reader::read_proc(void * _ptw) { + in_apex prevent_deadlocks; pthread_wrapper* ptw = (pthread_wrapper*)_ptw; // make sure APEX knows this is not a worker thread thread_instance::instance(false); diff --git a/src/apex/profiler_listener.cpp b/src/apex/profiler_listener.cpp index 40775f23..642c362b 100644 --- a/src/apex/profiler_listener.cpp +++ b/src/apex/profiler_listener.cpp @@ -1767,6 +1767,7 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl } void profiler_listener::push_profiler_public(std::shared_ptr &p) { + in_apex prevent_deadlocks; push_profiler(0, p); } diff --git a/src/apex/task_identifier.cpp b/src/apex/task_identifier.cpp index 29a6a632..0aaaf755 100644 --- a/src/apex/task_identifier.cpp +++ b/src/apex/task_identifier.cpp @@ -64,7 +64,7 @@ std::mutex bfd_mutex; if (_resolved_name == "") { //_resolved_name = lookup_address((uintptr_t)address, false); _resolved_name = thread_instance::instance().map_addr_to_name(address); - _resolved_name.assign((*demangle(_resolved_name))); + _resolved_name.assign((demangle(_resolved_name))); } } std::string retval(_resolved_name); @@ -85,7 +85,7 @@ std::mutex bfd_mutex; std::string * tmp = lookup_address((uintptr_t)addr_addr, true); REGEX_NAMESPACE::regex old_address("UNRESOLVED ADDR " + addr_str); retval = REGEX_NAMESPACE::regex_replace(retval, old_address, - (*demangle(*tmp))); + (demangle(*tmp))); } #endif static std::string cudastr("GPU: "); @@ -95,28 +95,25 @@ std::mutex bfd_mutex; std::stringstream ss; std::string tmp = retval.substr(cudastr.size(), retval.size() - cudastr.size()); - std::string * demangled = demangle(tmp); - ss << cudastr << *demangled; - free(demangled); + std::string demangled = demangle(tmp); + ss << cudastr << demangled; retval.assign(ss.str()); } else if (retval.find(kernel) != std::string::npos) { std::stringstream ss; std::string tmp = retval.substr(kernel.size(), retval.size() - kernel.size()); - std::string * demangled = demangle(tmp); - ss << kernel << *demangled; - free(demangled); + std::string demangled = demangle(tmp); + ss << kernel << demangled; retval.assign(ss.str()); } else if (retval.find(kernel2) != std::string::npos) { std::stringstream ss; std::string tmp = retval.substr(kernel2.size(), retval.size() - kernel2.size()); - std::string * demangled = demangle(tmp); - ss << kernel2 << *demangled; - free(demangled); + std::string demangled = demangle(tmp); + ss << kernel2 << demangled; retval.assign(ss.str()); } else { - retval.assign((*demangle(retval))); + retval.assign((demangle(retval))); } } return retval; diff --git a/src/apex/tcmalloc_hooks.cpp b/src/apex/tcmalloc_hooks.cpp new file mode 100644 index 00000000..766351c4 --- /dev/null +++ b/src/apex/tcmalloc_hooks.cpp @@ -0,0 +1,79 @@ +#ifdef APEX_HAVE_TCMALLOC + +#include "tcmalloc_hooks.hpp" +#include "gperftools/malloc_hook.h" +#include "apex_api.hpp" +#include "apex_assert.h" + +namespace apex { +namespace tcmalloc { + +tracker& getTracker() { + static tracker t; + return t; +} + +bool& inWrapper() { + thread_local static bool _inWrapper = false; + return _inWrapper; +} + +void NewHook(const void* ptr, size_t size) { + // prevent infinite recursion... + if (inWrapper() || apex::in_apex::get() > 0) { return; } + inWrapper() = true; + tracker& t = getTracker(); + double value = (double)(size); + apex::sample_value("Memory: Bytes Allocated", value, true); + t.hostMapMutex.lock(); + //std::cout << "Address " << ptr << " has " << size << " bytes." << std::endl; + t.hostMemoryMap[ptr] = value; + t.hostMapMutex.unlock(); + t.hostTotalAllocated.fetch_add(size, std::memory_order_relaxed); + value = (double)(t.hostTotalAllocated); + apex::sample_value("Memory: Total Bytes Occupied", value); + inWrapper() = false; +} + +void DeleteHook(const void* ptr) { + // prevent infinite recursion... + if (inWrapper() || apex::in_apex::get() > 0) { return; } + inWrapper() = true; + tracker& t = getTracker(); + size_t size = 0; + t.hostMapMutex.lock(); + if (t.hostMemoryMap.count(ptr) > 0) { + size = t.hostMemoryMap[ptr]; + t.hostMemoryMap.erase(ptr); + } else { + //std::cerr << "Address " << ptr << " not found!" << std::endl; + t.hostMapMutex.unlock(); + return; + } + t.hostMapMutex.unlock(); + double value = (double)(size); + apex::sample_value("Memory: Bytes Freed", value, true); + t.hostTotalAllocated.fetch_sub(size, std::memory_order_relaxed); + value = (double)(t.hostTotalAllocated); + apex::sample_value("Memory: Total Bytes Occupied", value); + inWrapper() = true; +} + +void init_hook() { + if (apex_options::track_memory()) { + getTracker(); + APEX_ASSERT(MallocHook::AddNewHook(&NewHook)); + APEX_ASSERT(MallocHook::AddDeleteHook(&DeleteHook)); + } +} + +void destroy_hook() { + if (apex_options::track_memory()) { + APEX_ASSERT(MallocHook::RemoveNewHook(&NewHook)); + APEX_ASSERT(MallocHook::RemoveDeleteHook(&DeleteHook)); + } +} + +} +} +#endif // APEX_HAVE_TCMALLOC diff --git a/src/apex/tcmalloc_hooks.hpp b/src/apex/tcmalloc_hooks.hpp new file mode 100644 index 00000000..27261612 --- /dev/null +++ b/src/apex/tcmalloc_hooks.hpp @@ -0,0 +1,27 @@ +#ifdef APEX_HAVE_TCMALLOC + +#include +#include +#include +#include + +namespace apex { +namespace tcmalloc { + +class tracker { +public: + std::atomic hostTotalAllocated; + std::unordered_map hostMemoryMap; + std::mutex hostMapMutex; + tracker() : hostTotalAllocated(0) { } +}; + +bool& inWrapper(void); +void NewHook(const void* ptr, size_t size); +void DeleteHook(const void* ptr); +void init_hook(); +void destroy_hook(); + +} +} +#endif // APEX_HAVE_TCMALLOC diff --git a/src/apex/utils.cpp b/src/apex/utils.cpp index ed8584c8..d1b1f469 100644 --- a/src/apex/utils.cpp +++ b/src/apex/utils.cpp @@ -63,8 +63,8 @@ std::vector &split(const std::string &s, char delim, return elems; } -std::string* demangle(const std::string& timer_name) { - std::string* demangled = new std::string(timer_name); +std::string demangle(const std::string& timer_name) { + std::string demangled = std::string(timer_name); #if defined(__GNUC__) int status; char *realname = abi::__cxa_demangle(timer_name.c_str(), 0, 0, &status); @@ -75,7 +75,7 @@ std::string* demangle(const std::string& timer_name) { *index = 0; // terminate before templates for brevity } */ - demangled = new std::string(realname); + demangled = std::string(realname); free(realname); } else { #if defined(APEX_DEBUG) @@ -555,6 +555,19 @@ node_color * get_node_color(double v,double vmin,double vmax) return(c); } +size_t& in_apex::get() { + thread_local static size_t _in = 0; + return _in; +} +in_apex::in_apex() { + get()++; + //printf("IN %lu, %lu\n", syscall(SYS_gettid), get()); +} +in_apex::~in_apex() { + get()--; + //printf("OUT %lu, %lu\n", syscall(SYS_gettid), get()); +} + } // namespace apex extern "C" void __cyg_profile_func_enter(void *this_fn, void *call_site) __attribute__((no_instrument_function)); diff --git a/src/apex/utils.hpp b/src/apex/utils.hpp index ba6f4898..146cba98 100644 --- a/src/apex/utils.hpp +++ b/src/apex/utils.hpp @@ -237,7 +237,7 @@ inline unsigned int hardware_concurrency() return(hwc); } -std::string* demangle(const std::string& timer_name); +std::string demangle(const std::string& timer_name); void set_thread_affinity(void); void set_thread_affinity(int core); @@ -271,5 +271,19 @@ class node_color { node_color * get_node_color_visible(double v, double vmin, double vmax); node_color * get_node_color(double v,double vmin,double vmax); +/* Defined in memory_wrapper_shudown.cpp */ +void enable_memory_wrapper(void); +void disable_memory_wrapper(void); + +#include + +class in_apex { + public: + static size_t& get(); + in_apex(); + ~in_apex(); +}; + } + diff --git a/src/scripts/apex_exec b/src/scripts/apex_exec index c82994cf..e58cc0db 100755 --- a/src/scripts/apex_exec +++ b/src/scripts/apex_exec @@ -116,6 +116,7 @@ while (( "$#" )); do ;; --apex:memory) memory=yes + export APEX_TRACK_MEMORY=1 shift ;; --apex:otf2) @@ -263,16 +264,17 @@ if [ "x${LD_LIBRARY_PATH}" = "x" ] ; then else APEX_LD_LIBRARY_PATH=${BASEDIR}/lib:${LD_LIBRARY_PATH} fi -if [ $pthread = 1 ]; then +if [ $pthread = yes ]; then PTHREAD_LIB=${BASEDIR}/lib/libapex_pthread_wrapper${SHLIBX}: fi -if [ $memory = 1 ]; then - PTHREAD_LIB=${BASEDIR}/lib/libapex_memory_wrapper${SHLIBX}: +if [ $memory = yes ]; then + MEMORY_LIB=${BASEDIR}/lib/libapex_memory_wrapper${SHLIBX}: + APEX_LD_AUDIT=${BASEDIR}/lib/libapex_dl_auditor${SHLIBX} fi if [ ${apple} = 1 ]; then - APEX_LD_PRELOAD=${PTHREAD_LIB}${BASEDIR}/lib/libapex${SHLIBX}:@OMPT_LIBRARY@ + APEX_LD_PRELOAD=${PTHREAD_LIB}${MEMORY_LIB}${BASEDIR}/lib/libapex${SHLIBX}:@OMPT_LIBRARY@ else - APEX_LD_PRELOAD=${PTHREAD_LIB}${BASEDIR}/lib/libapex${SHLIBX}:@OMPT_LIBRARY@:${LD_PRELOAD} + APEX_LD_PRELOAD=${PTHREAD_LIB}${MEMORY_LIB}${BASEDIR}/lib/libapex${SHLIBX}:@OMPT_LIBRARY@:${LD_PRELOAD} fi # remove double colons @@ -309,9 +311,12 @@ if [ $apple = 1 ]; then else export LD_LIBRARY_PATH=${APEX_LD_LIBRARY_PATH} export LD_PRELOAD=${APEX_LD_PRELOAD} + # Not sure if the DL auditor is necessary + #export LD_AUDIT=${APEX_LD_AUDIT} if [ $debug = yes ] ; then echo "set env LD_LIBRARY_PATH=${APEX_LD_LIBRARY_PATH}" >> ./.gdbcmds echo "set env LD_PRELOAD=${APEX_LD_PRELOAD}" >> ./.gdbcmds + #echo "set env LD_AUDIT=${APEX_LD_AUDIT}" >> ./.gdbcmds debugger="gdb -x ./.gdbcmds --args" fi fi diff --git a/src/wrappers/CMakeLists.txt b/src/wrappers/CMakeLists.txt index 45a993db..9f920fc2 100644 --- a/src/wrappers/CMakeLists.txt +++ b/src/wrappers/CMakeLists.txt @@ -11,11 +11,15 @@ add_dependencies (apex_pthread_wrapper apex) target_link_libraries (apex_pthread_wrapper apex) # Add library called "apex_memory_wrapper" that is built from the source file -add_library (apex_memory_wrapper memory_wrapper.c memory_wrapper_internal.cpp) +add_library (apex_memory_wrapper memory_wrapper.cpp memory_wrapper_internal.cpp) add_dependencies (apex_memory_wrapper apex) target_link_libraries (apex_memory_wrapper apex) -INSTALL(TARGETS apex_pthread_wrapper apex_memory_wrapper +# Add library called "apex_dl_auditor" that is built from the source file +add_library (apex_dl_auditor dl_auditor.c) +target_link_libraries (apex_dl_auditor dl) + +INSTALL(TARGETS apex_pthread_wrapper apex_memory_wrapper apex_dl_auditor RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib diff --git a/src/wrappers/dl_auditor.c b/src/wrappers/dl_auditor.c new file mode 100644 index 00000000..95f441ab --- /dev/null +++ b/src/wrappers/dl_auditor.c @@ -0,0 +1,50 @@ +#define _GNU_SOURCE +#include +#include +#include + +int * objopen_counter() +{ + static int count = 0; + return &count; +} + +// This auditor supports all API versions. +unsigned int la_version(unsigned int version) +{ + return version; +} + +unsigned int la_objopen(struct link_map *map, Lmid_t lmid, uintptr_t *cookie) +{ + (*objopen_counter())++; + return 0; +} + +void la_preinit(uintptr_t *cookie) +{ + typedef void (*apex_memory_dl_initialized_t)(); + static apex_memory_dl_initialized_t apex_memory_dl_initialized = NULL; + void * memory_so; + + memory_so = dlmopen(LM_ID_BASE, "libapex_memory_wrapper.so", RTLD_NOW); + + if (memory_so) { + char const * err; + + dlerror(); // reset error flag + apex_memory_dl_initialized = + (apex_memory_dl_initialized_t)dlsym(memory_so, + "apex_memory_dl_initialized"); + // Check for errors + if ((err = dlerror())) { + printf("APEX: ERROR obtaining symbol info in auditor: %s\n", err); + } else { + apex_memory_dl_initialized(); + } + dlclose(memory_so); + } else { + printf("APEX: ERROR in opening APEX library in auditor.\n"); + } +} + diff --git a/src/wrappers/memory_wrapper.c b/src/wrappers/memory_wrapper.c deleted file mode 100644 index 5211d2cc..00000000 --- a/src/wrappers/memory_wrapper.c +++ /dev/null @@ -1,197 +0,0 @@ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - -#include -#include -#include -#include -#include -#include - -#ifdef APEX_PRELOAD_LIB -/********************************/ -/* LD_PRELOAD wrapper functions */ -/********************************/ - -#define RESET_DLERROR() dlerror() -#define CHECK_DLERROR() { \ - char const * err = dlerror(); \ - if (err) { \ - printf("Error getting %s handle: %s\n", name, err); \ - fflush(stdout); \ - exit(1); \ - } \ -} - -static -void * get_system_function_handle(char const * name, void * caller) -{ - void * handle; - - // Reset error pointer - RESET_DLERROR(); - - // Attempt to get the function handle - handle = dlsym(RTLD_NEXT, name); - - // Detect errors - CHECK_DLERROR(); - - // Prevent recursion if more than one wrapping approach has been loaded. - // This happens because we support wrapping pthreads three ways at once: - // #defines in Profiler.h, -Wl,-wrap on the link line, and LD_PRELOAD. - if (handle == caller) { - RESET_DLERROR(); - void * syms = dlopen(NULL, RTLD_NOW); - CHECK_DLERROR(); - do { - RESET_DLERROR(); - handle = dlsym(syms, name); - CHECK_DLERROR(); - } while (handle == caller); - } - - return handle; -} - -void* malloc (size_t size) { - static malloc_p _malloc = NULL; - if (!_malloc) { - _malloc = (malloc_p)get_system_function_handle("malloc", (void*)malloc); - } - return apex_malloc_wrapper(_malloc, size); -} - -void* calloc (size_t nmemb, size_t size) { - static calloc_p _calloc = NULL; - if (!_calloc) { - _calloc = (calloc_p)get_system_function_handle("calloc", (void*)calloc); - } - return apex_calloc_wrapper(_calloc, nmemb, size); -} - -#if defined(memalign) -void* memalign (size_t alignment, size_t size) { - static memalign_p _memalign = NULL; - if (!_memalign) { - _memalign = (memalign_p)get_system_function_handle("memalign", (void*)memalign); - } - return apex_memalign_wrapper(_memalign, alignment, size); -} -#endif - -void* realloc (void* ptr, size_t size) { - static realloc_p _realloc = NULL; - if (!_realloc) { - _realloc = (realloc_p)get_system_function_handle("realloc", (void*)realloc); - } - return apex_realloc_wrapper(_realloc, ptr, size); -} - -#if defined(reallocarray) -void* reallocarray (void* ptr, size_t nmemb, size_t size) { - static reallocarray_p _reallocarray = NULL; - if (!_reallocarray) { - _reallocarray = (reallocarray_p)get_system_function_handle("reallocarray", (void*)reallocarray); - } - return apex_reallocarray_wrapper(_reallocarray, ptr, nmemb, size); -} -#endif - -#if defined(reallocf) -void* reallocf (void* ptr, size_t size) { - static reallocf_p _reallocf = NULL; - if (!_reallocf) { - _reallocf = (reallocf_p)get_system_function_handle("reallocf", (void*)reallocf); - } - return apex_reallocf_wrapper(_reallocf, ptr, size); -} -#endif - -#if defined(valloc) -void* valloc (size_t size) { - static valloc_p _valloc = NULL; - if (!_valloc) { - _valloc = (valloc_p)get_system_function_handle("valloc", (void*)valloc); - } - return apex_valloc_wrapper(_valloc, size); -} -#endif - -#if defined(malloc_usable_size) -size_t malloc_usable_size (void* ptr) { - static malloc_usable_size_p _malloc_usable_size = NULL; - if (!_malloc_usable_size) { - _malloc_usable_size = (malloc_usable_size_p)get_system_function_handle("malloc_usable_size", (void*)malloc_usable_size); - } - return apex_malloc_usable_size_wrapper(_malloc_usable_size, ptr); -} -#endif - -void free (void* ptr) { - static free_p _free = NULL; - if (!_free) { - _free = (free_p)get_system_function_handle("free", (void*)free); - } - return apex_free_wrapper(_free, ptr); -} - -#else // Wrap via the the link line. - -void* __real_malloc(size_t); -void* __wrap_malloc(size_t size) { - return apex_malloc_wrapper(__real_malloc, size); -} - -void* __real_calloc(size_t, size_t); -void* __wrap_calloc(size_t nmemb, size_t size) { - return apex_calloc_wrapper(__real_calloc, nmemb, size); -} - -#if defined(memalign) -void* __real_memalign(size_t, size_t); -void* __wrap_memalign(size_t alignment, size_t size) { - return apex_memalign_wrapper(__real_memalign, alignment, size); -} -#endif - -void* __real_realloc(void*, size_t); -void* __wrap_realloc(void* ptr, size_t size) { - return apex_realloc_wrapper(__real_realloc, ptr, size); -} - -#if defined(reallocarray) -void* __real_reallocarray(void*, size_t, size_t); -void* __wrap_reallocarray(void* ptr, size_t nmemb, size_t size) { - return apex_reallocarray_wrapper(__real_reallocarray, ptr, nmemb, size); -} -#endif - -#if defined(reallocf) -void* __real_reallocf(void*, size_t); -void* __wrap_reallocf(void* ptr, size_t size) { - return apex_reallocf_wrapper(__real_reallocf, ptr, size); -} -#endif - -#if defined(valloc) -void* __real_valloc(size_t); -void* __wrap_valloc(size_t size) { - return apex_valloc_wrapper(__vallocllocf, size); -} -#endif - -#if defined(malloc_usable_size) -size_t __real_malloc_usable_size(void*); -size_t __wrap_malloc_usable_size(void* ptr) { - return apex_malloc_usable_size_wrapper(__malloc_usable_size, ptr); -} -#endif - -void __real_free(void*); -void __wrap_free(void* ptr) { - return apex_free_wrapper(__real_free, ptr); -} - -#endif //APEX_PRELOAD_LIB diff --git a/src/wrappers/memory_wrapper.cpp b/src/wrappers/memory_wrapper.cpp new file mode 100644 index 00000000..40908e57 --- /dev/null +++ b/src/wrappers/memory_wrapper.cpp @@ -0,0 +1,357 @@ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include "apex_api.hpp" + +#ifdef _MSC_VER +/* define these functions as non-intrinsic */ +#pragma function( memcpy, strcpy, strcat ) +#endif + +/* At initializtion, we don't want to start tracking malloc/free + * until both APEX is initialized AND the dynamic library loader + * has finished initialization and is about to launch main. + * So we use 2 flags to accomlpish this. */ + +bool& apex_ready() { + static bool _ready = false; + return _ready; +} + +bool& dl_ready() { + static bool _ready = true; + return _ready; +} + +bool& enabled() { + static bool _enabled = true; + return _enabled; +} + +bool all_clear() { + return apex_ready() && dl_ready() && enabled(); +} + +extern "C" +void apex_memory_initialized() { + apex_memory_wrapper_init(); + apex_ready() = true; +} + +extern "C" +void apex_memory_finalized() { + apex_ready() = false; + apex_report_leaks(); +} + +extern "C" +void apex_memory_dl_initialized() { + dl_ready() = true; +} + +/* During startup, we need to do some memory management in case + * malloc/free is called during the startup process. */ + +// Memory for bootstrapping. must not be static! +char bootstrap_heap[BOOTSTRAP_HEAP_SIZE]; +char * bootstrap_base = bootstrap_heap; + +static inline int is_bootstrap(void * ptr) { + char const * const p = (char*)ptr; + return (p < bootstrap_heap + BOOTSTRAP_HEAP_SIZE) && (bootstrap_heap < p); +} + +static void * bootstrap_alloc(size_t align, size_t size) { + char * ptr; + + // Check alignment. Default alignment is sizeof(long) + if(!align) { + align = sizeof(long); + + if (size < align) { + // Align to the next lower power of two + align = size; + while (align & (align-1)) { + align &= align-1; + } + } + } + + // Calculate address + ptr = (char*)(((size_t)bootstrap_base + (align-1)) & ~(align-1)); + bootstrap_base = ptr + size; + + // Check for overflow + if (bootstrap_base >= (bootstrap_heap + BOOTSTRAP_HEAP_SIZE)) { + // These calls are unsafe, but we're about to die anyway. + printf("APEX bootstreap heap exceeded. Increase BOOTSTRAP_HEAP_SIZE in " __FILE__ " and try again.\n"); + fflush(stdout); + exit(1); + } + + return (void*)ptr; +} + +static inline void bootstrap_free(void * ptr) { + // Do nothing: bootstrap memory is deallocated on program exit + APEX_UNUSED(ptr); +} + + +#ifdef APEX_PRELOAD_LIB +/********************************/ +/* LD_PRELOAD wrapper functions */ +/********************************/ + +#define RESET_DLERROR() dlerror() +#define CHECK_DLERROR() { \ + char const * err = dlerror(); \ + if (err) { \ + printf("Error getting %s handle: %s\n", name, err); \ + fflush(stdout); \ + exit(1); \ + } \ +} + +static +void * get_system_function_handle(char const * name, void * caller) +{ + void * handle; + + // Reset error pointer + RESET_DLERROR(); + + // Attempt to get the function handle + handle = dlsym(RTLD_NEXT, name); + + // Detect errors + CHECK_DLERROR(); + + // Prevent recursion if more than one wrapping approach has been loaded. + // This happens because we support wrapping pthreads three ways at once: + // #defines in Profiler.h, -Wl,-wrap on the link line, and LD_PRELOAD. + if (handle == caller) { + RESET_DLERROR(); + void * syms = dlopen(NULL, RTLD_NOW); + CHECK_DLERROR(); + do { + RESET_DLERROR(); + handle = dlsym(syms, name); + CHECK_DLERROR(); + } while (handle == caller); + } + + return handle; +} + +extern "C" +void* malloc (size_t size) { + static malloc_p _malloc = NULL; + static bool initializing = false; + static bool bootstrapped = false; + if (!bootstrapped) { + if (!initializing) { + initializing = true; + _malloc = (malloc_p)get_system_function_handle("malloc", (void*)malloc); + } + if (!_malloc) { + return bootstrap_alloc(0, size); + } + if (!all_clear()) { + return _malloc(size); + } + bootstrapped = true; + } + if (all_clear()) { + return apex_malloc_wrapper(_malloc, size); + } + return _malloc(size); +} + +extern "C" +void free (void* ptr) { + static free_p _free = NULL; + static bool initializing = false; + static bool bootstrapped = false; + if (is_bootstrap(ptr)) { + // do nothing, effectively + return bootstrap_free(ptr); + } + if (!bootstrapped) { + if (!initializing) { + initializing = true; + _free = (free_p)get_system_function_handle("free", (void*)free); + } + if (!_free) { + // do nothing, effectively + return bootstrap_free(ptr); + } + if (!all_clear()) { + return _free(ptr); + } + bootstrapped = true; + } + if (all_clear()) { + return apex_free_wrapper(_free, ptr); + } + return _free(ptr); +} + +extern "C" +int puts (const char* s) { + static puts_p _puts = NULL; + static bool initializing = false; + static bool bootstrapped = false; + if (!bootstrapped) { + if (!initializing) { + initializing = true; + _puts = (puts_p)get_system_function_handle("puts", (void*)puts); + } + if (!_puts) { + // do nothing, effectively + return 0; + } + bootstrapped = true; + } + enabled() = false; + auto r = _puts(s); + enabled() = true; + return r; +} + +#if 0 +void* calloc (size_t nmemb, size_t size) { + static calloc_p _calloc = NULL; + if (!_calloc) { + _calloc = (calloc_p)get_system_function_handle("calloc", (void*)calloc); + } + return apex_calloc_wrapper(_calloc, nmemb, size); +} + +#if defined(memalign) +void* memalign (size_t alignment, size_t size) { + static memalign_p _memalign = NULL; + if (!_memalign) { + _memalign = (memalign_p)get_system_function_handle("memalign", (void*)memalign); + } + return apex_memalign_wrapper(_memalign, alignment, size); +} +#endif + +void* realloc (void* ptr, size_t size) { + static realloc_p _realloc = NULL; + if (!_realloc) { + _realloc = (realloc_p)get_system_function_handle("realloc", (void*)realloc); + } + return apex_realloc_wrapper(_realloc, ptr, size); +} + +#if defined(reallocarray) +void* reallocarray (void* ptr, size_t nmemb, size_t size) { + static reallocarray_p _reallocarray = NULL; + if (!_reallocarray) { + _reallocarray = (reallocarray_p)get_system_function_handle("reallocarray", (void*)reallocarray); + } + return apex_reallocarray_wrapper(_reallocarray, ptr, nmemb, size); +} +#endif + +#if defined(reallocf) +void* reallocf (void* ptr, size_t size) { + static reallocf_p _reallocf = NULL; + if (!_reallocf) { + _reallocf = (reallocf_p)get_system_function_handle("reallocf", (void*)reallocf); + } + return apex_reallocf_wrapper(_reallocf, ptr, size); +} +#endif + +#if defined(valloc) +void* valloc (size_t size) { + static valloc_p _valloc = NULL; + if (!_valloc) { + _valloc = (valloc_p)get_system_function_handle("valloc", (void*)valloc); + } + return apex_valloc_wrapper(_valloc, size); +} +#endif + +#if defined(malloc_usable_size) +size_t malloc_usable_size (void* ptr) { + static malloc_usable_size_p _malloc_usable_size = NULL; + if (!_malloc_usable_size) { + _malloc_usable_size = (malloc_usable_size_p)get_system_function_handle("malloc_usable_size", (void*)malloc_usable_size); + } + return apex_malloc_usable_size_wrapper(_malloc_usable_size, ptr); +} +#endif + +#endif + +#else // Wrap via the the link line. + +void* __real_malloc(size_t); +void* __wrap_malloc(size_t size) { + return apex_malloc_wrapper(__real_malloc, size); +} + +void __real_free(void*); +void __wrap_free(void* ptr) { + return apex_free_wrapper(__real_free, ptr); +} + +#if 0 +void* __real_calloc(size_t, size_t); +void* __wrap_calloc(size_t nmemb, size_t size) { + return apex_calloc_wrapper(__real_calloc, nmemb, size); +} + +#if defined(memalign) +void* __real_memalign(size_t, size_t); +void* __wrap_memalign(size_t alignment, size_t size) { + return apex_memalign_wrapper(__real_memalign, alignment, size); +} +#endif + +void* __real_realloc(void*, size_t); +void* __wrap_realloc(void* ptr, size_t size) { + return apex_realloc_wrapper(__real_realloc, ptr, size); +} + +#if defined(reallocarray) +void* __real_reallocarray(void*, size_t, size_t); +void* __wrap_reallocarray(void* ptr, size_t nmemb, size_t size) { + return apex_reallocarray_wrapper(__real_reallocarray, ptr, nmemb, size); +} +#endif + +#if defined(reallocf) +void* __real_reallocf(void*, size_t); +void* __wrap_reallocf(void* ptr, size_t size) { + return apex_reallocf_wrapper(__real_reallocf, ptr, size); +} +#endif + +#if defined(valloc) +void* __real_valloc(size_t); +void* __wrap_valloc(size_t size) { + return apex_valloc_wrapper(__vallocllocf, size); +} +#endif + +#if defined(malloc_usable_size) +size_t __real_malloc_usable_size(void*); +size_t __wrap_malloc_usable_size(void* ptr) { + return apex_malloc_usable_size_wrapper(__malloc_usable_size, ptr); +} +#endif +#endif + +#endif //APEX_PRELOAD_LIB diff --git a/src/wrappers/memory_wrapper.h b/src/wrappers/memory_wrapper.h index 294ce8dd..6faf7e3e 100644 --- a/src/wrappers/memory_wrapper.h +++ b/src/wrappers/memory_wrapper.h @@ -4,12 +4,32 @@ #include +// Assume 4K pages unless we know otherwise. +// We cannot determine this at runtime because it must be known during +// the bootstrap process and it would be unsafe to make any system calls there. +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +// Size of heap memory for library wrapper bootstrapping +#ifdef __APPLE__ +// Starting on macOS 11, PAGE_SIZE is not constant on macOS +// Apple recommends using PAGE_MAX_SIZE instead. +// see https://developer.apple.com/videos/play/wwdc2020/10214/?time=549 +#define BOOTSTRAP_HEAP_SIZE (3*PAGE_MAX_SIZE) +#else +#define BOOTSTRAP_HEAP_SIZE (3*PAGE_SIZE) +#endif + typedef void* (*malloc_p)(size_t); +typedef void (*free_p)(void*); +typedef int (*puts_p)(const char*); +#if 0 typedef void* (*calloc_p)(size_t, size_t); +typedef void* (*realloc_p)(void*, size_t); #if defined(memalign) typedef void* (*memalign_p)(void*, size_t, size_t); #endif -typedef void* (*realloc_p)(void*, size_t); #if defined(reallocarray) typedef void* (*reallocarray_p)(void*, size_t, size_t); #endif @@ -22,13 +42,18 @@ typedef void* (*valloc_p)(size_t); #if defined(malloc_usable_size) typedef size_t (*valloc_p)(void*); #endif -typedef void (*free_p)(void*); +#endif #ifdef __cplusplus extern "C" { #endif void* apex_malloc_wrapper(malloc_p malloc_call, size_t size); +void apex_free_wrapper(free_p free_call, void* ptr); +int apex_puts_wrapper(const char* s); +void apex_memory_wrapper_init(void); +void apex_report_leaks(void); +#if 0 void* apex_calloc_wrapper(calloc_p calloc_call, size_t nmemb, size_t size); #if defined(memalign) void* apex_memalign_wrapper(memalign_p calloc_call, size_t align, size_t size); @@ -46,7 +71,7 @@ void* apex_valloc_wrapper(valloc_p valloc_call, size_t size); #if defined(malloc_usable_size) void* apex_malloc_usable_size_wrapper(malloc_usable_size_p malloc_usable_size_call, void* ptr); #endif -void apex_free_wrapper(free_p free_call, void* ptr); +#endif #ifdef __cplusplus } diff --git a/src/wrappers/memory_wrapper_internal.cpp b/src/wrappers/memory_wrapper_internal.cpp index 70badbd9..d72fb55c 100644 --- a/src/wrappers/memory_wrapper_internal.cpp +++ b/src/wrappers/memory_wrapper_internal.cpp @@ -1,35 +1,130 @@ -#include "apex_api.hpp" #include "memory_wrapper.h" #include +#include +#include +#include +#include +#include "apex_api.hpp" +#include "utils.hpp" +//#include /////////////////////////////////////////////////////////////////////////////// // Below is the malloc wrapper /////////////////////////////////////////////////////////////////////////////// +typedef struct book_s { + std::atomic totalAllocated = 0.0; + std::unordered_map memoryMap; + std::mutex mapMutex; +} book_t; + +book_t& getBook() { + static book_t book; + return book; +} + +void record_alloc(size_t bytes, void* ptr) { + static book_t& book = getBook(); + double value = (double)(bytes); + apex::sample_value("Memory: Bytes Allocated", value, true); + book.mapMutex.lock(); + book.memoryMap[ptr] = value; + book.mapMutex.unlock(); + book.totalAllocated.fetch_add(bytes, std::memory_order_relaxed); + value = (double)(book.totalAllocated); + apex::sample_value("Memory: Total Bytes Occupied", value); +} + +void record_free(void* ptr) { + static book_t& book = getBook(); + size_t bytes; + book.mapMutex.lock(); + if (book.memoryMap.count(ptr) > 0) { + bytes = book.memoryMap[ptr]; + book.memoryMap.erase(ptr); + } else { + book.mapMutex.unlock(); + return; + } + book.mapMutex.unlock(); + double value = (double)(bytes); + apex::sample_value("Memory: Bytes Freed", value, true); + book.totalAllocated.fetch_sub(bytes, std::memory_order_relaxed); + value = (double)(book.totalAllocated); + apex::sample_value("Memory: Total Bytes Occupied", value); +} + +/* We need to access this global before the memory wrapper is enabled. + * Otherwise, when it is constructed during the first allocation, we + * could end up with a deadlock. */ +void apex_memory_wrapper_init() { + static book_t& book = getBook(); + APEX_UNUSED(book); +} + bool& inWrapper() { thread_local static bool _inWrapper = false; return _inWrapper; } -extern "C" void* apex_malloc_wrapper(malloc_p malloc_call, size_t size) { - if(inWrapper()) { + if(inWrapper() || apex::in_apex::get() > 0) { // Another wrapper has already intercepted the call so just pass through return malloc_call(size); - printf("Here!\n"); - } else { - inWrapper() = true; - printf("Here!\n"); + } + inWrapper() = true; + // do the allocation + auto retval = malloc_call(size); + // record the state + record_alloc(size, retval); + inWrapper() = false; + return retval; +} - // do the allocation - auto retval = malloc_call(size); - apex::sample_value("malloc bytes", size, true); +void apex_free_wrapper(free_p free_call, void* ptr) { + if(inWrapper() || apex::in_apex::get() > 0) { + // Another wrapper has already intercepted the call so just pass through + return free_call(ptr); + } + inWrapper() = true; + // record the state + record_free(ptr); + // do the allocation + free_call(ptr); + inWrapper() = false; + return; +} - inWrapper() = false; - return retval; +// Comparator function to sort pairs descending, according to second value +bool cmp(std::pair& a, + std::pair& b) +{ + return a.second > b.second; +} + +void apex_report_leaks() { + static book_t& book = getBook(); + // Declare vector of pairs + std::vector > sorted; + + // Copy key-value pair from Map + // to vector of pairs + book.mapMutex.lock(); + for (auto& it : book.memoryMap) { + sorted.push_back(it); + } + book.mapMutex.unlock(); + + // Sort using comparator function + sort(sorted.begin(), sorted.end(), cmp); + + // Print the sorted value + for (auto& it : sorted) { + std::cout << it.first << " leaked " << it.second << " bytes." << std::endl; } } +#if 0 extern "C" void* apex_calloc_wrapper(calloc_p calloc_call, size_t nmemb, size_t size) { if(inWrapper()) { @@ -152,26 +247,17 @@ size_t apex_malloc_usable_size_wrapper(malloc_usable_size_p malloc_usable_size_c } #endif -extern "C" -void apex_free_wrapper(free_p free_call, void* ptr) { - if(inWrapper()) { - // Another wrapper has already intercepted the call so just pass through - return free_call(ptr); - } else { - inWrapper() = true; - - // do the allocation - free_call(ptr); - - inWrapper() = false; - return; - } -} +#endif extern "C" void* apex_malloc(size_t size) { return apex_malloc_wrapper(malloc, size); } +extern "C" void apex_free(void* ptr) { + return apex_free_wrapper(free, ptr); +} + +#if 0 extern "C" void* apex_calloc(size_t nmemb, size_t size) { return apex_calloc_wrapper(calloc, nmemb, size); } @@ -210,8 +296,6 @@ extern "C" void* apex_malloc_usable_size(void* ptr) { } #endif -extern "C" void apex_free(void* ptr) { - return apex_free_wrapper(free, ptr); -} +#endif