From 0efbb5fcc5fc30514435cce59de131a9021a7543 Mon Sep 17 00:00:00 2001 From: zhiqiu Date: Fri, 11 Mar 2022 11:59:42 +0800 Subject: [PATCH 1/3] fix gpu conetxt callback --- paddle/phi/backends/gpu/gpu_context.cc | 13 ++++++++----- .../phi/kernels/funcs/concat_and_split_functor.cu | 2 ++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index 09deb575f2414..d32cbfed36b37 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -654,13 +654,16 @@ struct GPUContext::Impl { } void AddStreamCallback(const std::function& callback) const { - // TODO(wilber): Do we need ThreadPool? - auto* func = new std::function([this, callback] { - std::lock_guard lock(stream_call_back_mtx_); - last_future_ = std::async(std::launch::deferred, [&]() { callback(); }); - }); + // NOTE(zhiqiu): better use threadpool here, otherwise it may launch too + // many threads. + auto* func = + new std::function([ this, callback = std::move(callback) ] { + std::lock_guard lock(stream_call_back_mtx_); + last_future_ = std::async(std::launch::async, callback); + }); #ifdef PADDLE_WITH_HIP + VLOG(4) << "hipStreamAddCallback: " << stream_; PADDLE_ENFORCE_GPU_SUCCESS( hipStreamAddCallback(stream_, internal::StreamCallbackFunc, func, 0)); #endif diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cu b/paddle/phi/kernels/funcs/concat_and_split_functor.cu index 840c8872f50f8..06be592dd9375 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.cu +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cu @@ -395,6 +395,8 @@ struct ConcatFunctor { auto* data_alloc_released = data_alloc.release(); auto* col_alloc_released = col_alloc.release(); context.AddStreamCallback([data_alloc_released, col_alloc_released] { + VLOG(4) << "Delete cuda pinned at " << data_alloc_released; + VLOG(4) << "Delete cuda pinned at " << col_alloc_released; paddle::memory::allocation::Allocator::AllocationDeleter( data_alloc_released); paddle::memory::allocation::Allocator::AllocationDeleter( From e808e31af5e7c62c072391ee6a1d5bd1f6d4234b Mon Sep 17 00:00:00 2001 From: zhiqiu Date: Fri, 11 Mar 2022 13:11:17 +0800 Subject: [PATCH 2/3] fix gpu callback --- paddle/phi/backends/gpu/gpu_context.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index d32cbfed36b37..88d4230dadfce 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -654,8 +654,9 @@ struct GPUContext::Impl { } void AddStreamCallback(const std::function& callback) const { - // NOTE(zhiqiu): better use threadpool here, otherwise it may launch too - // many threads. + // NOTE(zhiqiu): better use threadpool here, otherwise "std::async" may + // launch too + // many threads and result in thread oversubscription. auto* func = new std::function([ this, callback = std::move(callback) ] { std::lock_guard lock(stream_call_back_mtx_); @@ -663,7 +664,6 @@ struct GPUContext::Impl { }); #ifdef PADDLE_WITH_HIP - VLOG(4) << "hipStreamAddCallback: " << stream_; PADDLE_ENFORCE_GPU_SUCCESS( hipStreamAddCallback(stream_, internal::StreamCallbackFunc, func, 0)); #endif From 45c89ac1a2b073a0d1fad3cd7e1138e3aaa8dba9 Mon Sep 17 00:00:00 2001 From: zhiqiu Date: Mon, 14 Mar 2022 13:54:46 +0800 Subject: [PATCH 3/3] fix callback early destruct problem --- paddle/phi/backends/gpu/gpu_context.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index 88d4230dadfce..a3b252598582b 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -657,11 +657,15 @@ struct GPUContext::Impl { // NOTE(zhiqiu): better use threadpool here, otherwise "std::async" may // launch too // many threads and result in thread oversubscription. - auto* func = - new std::function([ this, callback = std::move(callback) ] { - std::lock_guard lock(stream_call_back_mtx_); - last_future_ = std::async(std::launch::async, callback); - }); + auto* callback_func = new std::function(std::move(callback)); + auto* func = new std::function([this, callback_func] { + std::lock_guard lock(stream_call_back_mtx_); + VLOG(4) << "Stream callback"; + last_future_ = std::async(std::launch::async, [callback_func]() { + std::unique_ptr> releaser(callback_func); + (*callback_func)(); + }); + }); #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS(