From e562d20c871745932433076e0b4ab4c83eb1c4ae Mon Sep 17 00:00:00 2001 From: zhiqiu Date: Mon, 11 Oct 2021 07:18:49 +0000 Subject: [PATCH] fix error during multiple process --- .../memory/allocation/allocator_facade.cc | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 78d0941410f5e..281902f3a2b12 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -202,6 +202,8 @@ class AllocatorFacadePrivate { inline const std::shared_ptr& GetAllocator( const platform::Place& place, size_t size) { + VLOG(4) << "GetAllocator" + << " " << place << " " << size; const auto& allocators = (size > 0 ? (UNLIKELY(FLAGS_use_system_allocator) ? system_allocators_ : GetAllocatorMap()) @@ -258,17 +260,34 @@ class AllocatorFacadePrivate { bool allow_free_idle_chunk) { auto cuda_allocator = std::make_shared(p); auto alignment = platform::GpuMinChunkSize(); - const auto& prop = platform::GetDeviceProperties(p.GetDeviceId()); - bool need_addr_align = prop.textureAlignment < alignment; + bool need_addr_align = true; + // NOTE: sometimes, since cuda runtime can not be forked, calling any cuda + // API in that case may got cuda error(3), i.e., + // cudaErrorInitializationError. And, the CUDAAllocator is only initialized + // but not really used. + // Here, the try-catch block is added to handle the case that + // GetDeviceProperties() may failed in the multiple process(for example, in + // dataloader with num_worker > 0) + try { + const auto& prop = platform::GetDeviceProperties(p.GetDeviceId()); + need_addr_align = prop.textureAlignment < alignment; + VLOG(4) << "GetDeviceProperties ok, textureAlignment: " + << prop.textureAlignment + << ", set need_addr_align=" << need_addr_align; + } catch (...) { + need_addr_align = true; + VLOG(4) << "GetDeviceProperties failed, set need_addr_align=true"; + } + // The address returned is aligned already, + // ref: + // https://stackoverflow.com/questions/14082964/cuda-alignment-256bytes-seriously/14083295#14083295 std::shared_ptr underlying_allocator{nullptr}; if (need_addr_align) { - VLOG(10) << "use AlignedAllocator with alignment: " << alignment - << ", textureAlignment: " << prop.textureAlignment; + VLOG(10) << "use AlignedAllocator with alignment: " << alignment; underlying_allocator = std::make_shared(underlying_allocator, alignment); } else { - VLOG(10) << "not use AlignedAllocator with alignment: " << alignment - << ", textureAlignment: " << prop.textureAlignment; + VLOG(10) << "not use AlignedAllocator with alignment: " << alignment; underlying_allocator = cuda_allocator; } allocators_[p] = std::make_shared(