Skip to content

Commit

Permalink
fix error during multiple process
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiqiu committed Oct 11, 2021
1 parent 8efe79d commit e562d20
Showing 1 changed file with 25 additions and 6 deletions.
31 changes: 25 additions & 6 deletions paddle/fluid/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ class AllocatorFacadePrivate {

inline const std::shared_ptr<Allocator>& GetAllocator(
const platform::Place& place, size_t size) {
VLOG(4) << "GetAllocator"
<< " " << place << " " << size;
const auto& allocators =
(size > 0 ? (UNLIKELY(FLAGS_use_system_allocator) ? system_allocators_
: GetAllocatorMap())
Expand Down Expand Up @@ -258,17 +260,34 @@ class AllocatorFacadePrivate {
bool allow_free_idle_chunk) {
auto cuda_allocator = std::make_shared<CUDAAllocator>(p);
auto alignment = platform::GpuMinChunkSize();
const auto& prop = platform::GetDeviceProperties(p.GetDeviceId());
bool need_addr_align = prop.textureAlignment < alignment;
bool need_addr_align = true;
// NOTE: sometimes, since cuda runtime can not be forked, calling any cuda
// API in that case may got cuda error(3), i.e.,
// cudaErrorInitializationError. And, the CUDAAllocator is only initialized
// but not really used.
// Here, the try-catch block is added to handle the case that
// GetDeviceProperties() may failed in the multiple process(for example, in
// dataloader with num_worker > 0)
try {
const auto& prop = platform::GetDeviceProperties(p.GetDeviceId());
need_addr_align = prop.textureAlignment < alignment;
VLOG(4) << "GetDeviceProperties ok, textureAlignment: "
<< prop.textureAlignment
<< ", set need_addr_align=" << need_addr_align;
} catch (...) {
need_addr_align = true;
VLOG(4) << "GetDeviceProperties failed, set need_addr_align=true";
}
// The address returned is aligned already,
// ref:
// https://stackoverflow.com/questions/14082964/cuda-alignment-256bytes-seriously/14083295#14083295
std::shared_ptr<Allocator> underlying_allocator{nullptr};
if (need_addr_align) {
VLOG(10) << "use AlignedAllocator with alignment: " << alignment
<< ", textureAlignment: " << prop.textureAlignment;
VLOG(10) << "use AlignedAllocator with alignment: " << alignment;
underlying_allocator =
std::make_shared<AlignedAllocator>(underlying_allocator, alignment);
} else {
VLOG(10) << "not use AlignedAllocator with alignment: " << alignment
<< ", textureAlignment: " << prop.textureAlignment;
VLOG(10) << "not use AlignedAllocator with alignment: " << alignment;
underlying_allocator = cuda_allocator;
}
allocators_[p] = std::make_shared<AutoGrowthBestFitAllocator>(
Expand Down

0 comments on commit e562d20

Please sign in to comment.