Skip to content

Commit

Permalink
Splitting GPU and CPU memory allocation leak tracking.
Browse files Browse the repository at this point in the history
  • Loading branch information
khuck committed Jul 25, 2022
1 parent 3f5a1c7 commit 4aeade6
Show file tree
Hide file tree
Showing 17 changed files with 351 additions and 319 deletions.
1 change: 1 addition & 0 deletions src/apex/CMakeLists.hpx
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ set(apex_headers
exhaustive.hpp
gzstream.hpp
handler.hpp
memory_wrapper.hpp
policy_handler.hpp
profile.hpp
profiler.hpp
Expand Down
1 change: 1 addition & 0 deletions src/apex/CMakeLists.standalone
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ INSTALL(FILES apex.h
exhaustive.hpp
dependency_tree.hpp
handler.hpp
memory_wrapper.hpp
profile.hpp
apex_export.h
utils.hpp
Expand Down
3 changes: 3 additions & 0 deletions src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
#include "proc_read.h"
#endif

#include "memory_wrapper.hpp"

#ifdef APEX_HAVE_HPX
#include <boost/assign.hpp>
#include <cstdint>
Expand Down Expand Up @@ -1650,6 +1652,7 @@ void finalize()
//tcmalloc::destroy_hook();
#endif
disable_memory_wrapper();
apex_report_leaks();
#if APEX_HAVE_BFD
address_resolution::delete_instance();
#endif
Expand Down
3 changes: 2 additions & 1 deletion src/apex/apex_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ inline unsigned int sc_nprocessors_onln()
macro (APEX_OMPT_HIGH_OVERHEAD_EVENTS, ompt_high_overhead_events, \
bool, false) \
macro (APEX_PIN_APEX_THREADS, pin_apex_threads, bool, true) \
macro (APEX_TRACK_MEMORY, track_memory, bool, false) \
macro (APEX_TRACK_CPU_MEMORY, track_cpu_memory, bool, false) \
macro (APEX_TRACK_GPU_MEMORY, track_gpu_memory, bool, false) \
macro (APEX_TASK_SCATTERPLOT, task_scatterplot, bool, false) \
macro (APEX_TIME_TOP_LEVEL_OS_THREADS, top_level_os_threads, bool, false) \
macro (APEX_POLICY_DRAIN_TIMEOUT, policy_drain_timeout, int, 1000) \
Expand Down
2 changes: 1 addition & 1 deletion src/apex/cupti_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1846,7 +1846,7 @@ void apex_cupti_callback_dispatch(void *ud, CUpti_CallbackDomain domain,
CUpti_CallbackData * cbdata = (CUpti_CallbackData*)(params);

// sadly, CUPTI leaks a lot of memory from the first runtime API call.
if (apex::apex_options::track_memory()) {
if (apex::apex_options::track_gpu_memory()) {
ignoreMalloc(domain, cbdata->callbackSite, id);
}

Expand Down
6 changes: 6 additions & 0 deletions src/apex/hip_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ using namespace std;
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <inttypes.h>

#include "memory_wrapper.hpp"

// Macro to check ROC-tracer calls status
#define ROCTRACER_CALL(call) \
do { \
Expand Down Expand Up @@ -513,6 +515,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data,
hostTotalAllocated.fetch_sub(bytes, std::memory_order_relaxed);
value = (double)(hostTotalAllocated);
store_sync_counter_data(nullptr, "Total Bytes Occupied on Host", value, false);
apex::recordFree(ptr);
} else {
mapMutex.lock();
if (memoryMap.count(ptr) > 0) {
Expand All @@ -528,6 +531,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data,
totalAllocated.fetch_sub(value, std::memory_order_relaxed);
value = (double)(totalAllocated);
store_sync_counter_data(nullptr, "Total Bytes Occupied on Device", value, false);
apex::recordFree(ptr, false);
}
// If we are in the exit of a function, and we are allocating memory,
// then update and record the bytes allocated
Expand All @@ -543,6 +547,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data,
hostTotalAllocated.fetch_add(bytes, std::memory_order_relaxed);
value = (double)(hostTotalAllocated);
store_sync_counter_data(nullptr, "Total Bytes Occupied on Host", value, false);
apex::recordAlloc(bytes, ptr, apex::GPU_HOST_MALLOC);
return true;
} else {
if (managed) {
Expand All @@ -556,6 +561,7 @@ bool getBytesIfMalloc(uint32_t cid, const hip_api_data_t* data,
totalAllocated.fetch_add(bytes, std::memory_order_relaxed);
value = (double)(totalAllocated);
store_sync_counter_data(nullptr, "Total Bytes Occupied on Device", value, false);
apex::recordAlloc(bytes, ptr, apex::GPU_DEVICE_MALLOC, false);
}
}
return true;
Expand Down
224 changes: 222 additions & 2 deletions src/apex/memory_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,25 @@
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#include "memory_wrapper.hpp"
#include "apex_api.hpp"
#include "apex.hpp"
#include <execinfo.h>
#include "address_resolution.hpp"

namespace apex {

static const char * allocator_strings[] = {
"malloc", "calloc", "realloc", "gpu_host_malloc", "gpu_device_malloc"
};

book_t& getBook() {
static book_t book;
return book;
}

void enable_memory_wrapper() {
if (!apex_options::track_memory()) { return; }
if (!apex_options::track_cpu_memory()) { return; }
typedef void (*apex_memory_initialized_t)();
static apex_memory_initialized_t apex_memory_initialized = NULL;
void * memory_so;
Expand Down Expand Up @@ -46,7 +59,7 @@ void enable_memory_wrapper() {
}

void disable_memory_wrapper() {
if (!apex_options::track_memory()) { return; }
if (!apex_options::track_cpu_memory()) { return; }
typedef void (*apex_memory_finalized_t)();
static apex_memory_finalized_t apex_memory_finalized = NULL;
void * memory_so;
Expand Down Expand Up @@ -74,8 +87,215 @@ void disable_memory_wrapper() {
dlerror(); // reset error flag
}

void printBacktrace() {
void *trace[32];
size_t size, i;
char **strings;
size = backtrace( trace, 32 );
strings = backtrace_symbols( trace, size );
std::cerr << std::endl;
// skip the first frame, it is this handler
for( i = 1; i < size; i++ ){
std::cerr << demangle(strings[i]) << std::endl;
}
}

void recordAlloc(size_t bytes, void* ptr, allocator_t alloc, bool cpu) {
static book_t& book = getBook();
double value = (double)(bytes);
if (cpu) sample_value("Memory: Bytes Allocated", value, true);
profiler * p = thread_instance::instance().get_current_profiler();
record_t tmp(value, thread_instance::instance().get_id(), alloc);
if (p != nullptr) { tmp.id = p->get_task_id(); }
//backtrace_record_t rec(3,tmp.backtrace);
//_Unwind_Backtrace (default_unwind, &(rec));
tmp.size = backtrace(tmp.backtrace.data(), tmp.backtrace.size());
book.mapMutex.lock();
//book.memoryMap[ptr] = value;
book.memoryMap.insert(std::pair<void*,record_t>(ptr, tmp));
book.mapMutex.unlock();
book.totalAllocated.fetch_add(bytes, std::memory_order_relaxed);
value = (double)(book.totalAllocated);
if (cpu) sample_value("Memory: Total Bytes Occupied", value);
if (p == nullptr) {
auto i = apex::instance();
// might be after finalization, so double-check!
if (i != nullptr) {
i->the_profiler_listener->increment_main_timer_allocations(value);
}
} else {
p->allocations++;
p->bytes_allocated += value;
}
}

void recordFree(void* ptr, bool cpu) {
static book_t& book = getBook();
size_t bytes;
book.mapMutex.lock();
if (book.memoryMap.count(ptr) > 0) {
record_t& tmp = book.memoryMap[ptr];
bytes = tmp.bytes;
book.memoryMap.erase(ptr);
} else {
//std::cout << std::hex << ptr << std::dec << " NOT FOUND" << std::endl;
//printBacktrace();
book.mapMutex.unlock();
return;
}
book.mapMutex.unlock();
double value = (double)(bytes);
if (cpu) sample_value("Memory: Bytes Freed", value, true);
book.totalAllocated.fetch_sub(bytes, std::memory_order_relaxed);
value = (double)(book.totalAllocated);
if (cpu) sample_value("Memory: Total Bytes Occupied", value);
profiler * p = thread_instance::instance().get_current_profiler();
if (p == nullptr) {
auto i = apex::instance();
// might be after finalization, so double-check!
if (i != nullptr) {
i->the_profiler_listener->increment_main_timer_frees(value);
}
} else {
p->frees++;
p->bytes_freed += value;
}
}

// Comparator function to sort pairs descending, according to second value
bool cmp(std::pair<void*, record_t>& a,
std::pair<void*, record_t>& b)
{
return a.second.bytes > b.second.bytes;
}

// Comparator function to sort pairs descending, according to second value
bool cmp2(std::pair<std::string, size_t>& a,
std::pair<std::string, size_t>& b)
{
return a.second > b.second;
}

void apex_report_leaks() {
if (!apex_options::track_gpu_memory() && !apex_options::track_cpu_memory()) {
return;
}
static bool once{false};
if (once) return;
once = true;
static book_t& book = getBook();
std::stringstream ss;
ss << "memory_report." << book.saved_node_id << ".txt";
std::string tmp{ss.str()};
std::ofstream report (tmp);
// Declare vector of pairs
std::vector<std::pair<void*, record_t> > sorted;

if (book.saved_node_id == 0) {
std::cout << "APEX Memory Report: (see " << tmp << ")" << std::endl;
std::cout << "sorting " << book.memoryMap.size() << " leaks by size..." << std::endl;
}

// Copy key-value pair from Map
// to vector of pairs
for (auto& it : book.memoryMap) {
sorted.push_back(it);
}

// Sort using comparator function
sort(sorted.begin(), sorted.end(), cmp);

//std::unordered_map<std::string, size_t> locations;

if (book.saved_node_id == 0) {
std::cout << "Aggregating leaks by task and writing report..." << std::endl;
#ifdef APEX_WITH_CUDA
std::cout << "Ignoring known leaks in CUDA/CUPTI..." << std::endl;
#endif
}
size_t actual_leaks{0};
// Print the sorted value
for (auto& it : sorted) {
std::stringstream ss;
//if (it.second.bytes > 1000) {
ss << it.second.bytes << " bytes leaked at " << std::hex << it.first << std::dec << " from task ";
//} else {
//break;
//}
std::string name{"(no timer)"};
bool nameless{true};
if (it.second.id != nullptr) {
name = it.second.id->get_name();
// skip known CUPTI leaks.
//if (name.rfind("cuda", 0) == 0) { continue; }
nameless = false;
}
ss << name << " on tid " << it.second.tid << " with backtrace: " << std::endl;
ss << "\t" << allocator_strings[it.second.alloc] << std::endl;
char** strings = backtrace_symbols( it.second.backtrace.data(), it.second.size );
bool skip{false};
for(size_t i = 3; i < it.second.size; i++ ){
std::string tmp{strings[i]};
if (tmp.find("cuInit", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("libcudart", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("libcupti", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("pthread_once", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("atexit", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("apex_pthread_function", 0) != std::string::npos) { skip = true; break; }
if (nameless) {
if (tmp.find("libcuda", 0) != std::string::npos) { skip = true; break; }
if (tmp.find("GOMP_parallel", 0) != std::string::npos) { skip = true; break; }
}
std::string* tmp2{lookup_address(((uintptr_t)it.second.backtrace[i]), true)};
ss << "\t" << *tmp2 << std::endl;
}
if (skip) { continue; }

/*
//for (auto a : it.second.backtrace) {
for (size_t a = 2 ; a < it.second.size; a++) {
//std::string * tmp = lookup_address(a, true);
std::string * tmp = lookup_address((uintptr_t)it.second.backtrace[a], true);
std::string demangled = demangle(*tmp);
ss << "\t" << demangled << std::endl;
}
*/
ss << std::endl;
/*
if (locations.count(name) > 0) {
locations[name] += it.second.bytes;
} else {
locations[name] = it.second.bytes;
}
*/
report << ss.str();
actual_leaks++;
}
report.close();
std::cout << "Reported " << actual_leaks << " 'actual' leaks.\nExpect false positives if memory was freed after exit." << std::endl;

/*
std::cout << "sorting task leaks by size..." << std::endl;
// Declare vector of pairs
std::vector<std::pair<std::string, size_t> > sorted2;
// Copy key-value pair from Map to vector of pairs
for (auto& it : locations) {
sorted2.push_back(it);
}
// Sort using comparator function
sort(sorted2.begin(), sorted2.end(), cmp2);
// print the locations
for (auto& l : sorted2) {
std::cout << l.first << " leaked " << l.second << " bytes." << std::endl;
}
*/
}

} // end namespace

extern "C" void enable_memory_wrapper(void) {
apex::enable_memory_wrapper();
}
Expand Down
Loading

0 comments on commit 4aeade6

Please sign in to comment.