Skip to content

Commit

Permalink
fix error during multiple process
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiqiu committed Oct 11, 2021
1 parent 8efe79d commit e562d20
Showing 1 changed file with 25 additions and 6 deletions.
31 changes: 25 additions & 6 deletions paddle/fluid/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ class AllocatorFacadePrivate {

inline const std::shared_ptr<Allocator>& GetAllocator(
const platform::Place& place, size_t size) {
VLOG(4) << "GetAllocator"
<< " " << place << " " << size;
const auto& allocators =
(size > 0 ? (UNLIKELY(FLAGS_use_system_allocator) ? system_allocators_
: GetAllocatorMap())
Expand Down Expand Up @@ -258,17 +260,34 @@ class AllocatorFacadePrivate {
bool allow_free_idle_chunk) {
auto cuda_allocator = std::make_shared<CUDAAllocator>(p);
auto alignment = platform::GpuMinChunkSize();
const auto& prop = platform::GetDeviceProperties(p.GetDeviceId());
bool need_addr_align = prop.textureAlignment < alignment;
bool need_addr_align = true;
// NOTE: sometimes, since cuda runtime can not be forked, calling any cuda
// API in that case may got cuda error(3), i.e.,
// cudaErrorInitializationError. And, the CUDAAllocator is only initialized
// but not really used.
// Here, the try-catch block is added to handle the case that
// GetDeviceProperties() may failed in the multiple process(for example, in
// dataloader with num_worker > 0)
try {
const auto& prop = platform::GetDeviceProperties(p.GetDeviceId());
need_addr_align = prop.textureAlignment < alignment;
VLOG(4) << "GetDeviceProperties ok, textureAlignment: "
<< prop.textureAlignment
<< ", set need_addr_align=" << need_addr_align;
} catch (...) {
need_addr_align = true;
VLOG(4) << "GetDeviceProperties failed, set need_addr_align=true";
}
// The address returned is aligned already,
// ref:
// https://stackoverflow.com/questions/14082964/cuda-alignment-256bytes-seriously/14083295#14083295
std::shared_ptr<Allocator> underlying_allocator{nullptr};
if (need_addr_align) {
VLOG(10) << "use AlignedAllocator with alignment: " << alignment
<< ", textureAlignment: " << prop.textureAlignment;
VLOG(10) << "use AlignedAllocator with alignment: " << alignment;
underlying_allocator =
std::make_shared<AlignedAllocator>(underlying_allocator, alignment);
} else {
VLOG(10) << "not use AlignedAllocator with alignment: " << alignment
<< ", textureAlignment: " << prop.textureAlignment;
VLOG(10) << "not use AlignedAllocator with alignment: " << alignment;
underlying_allocator = cuda_allocator;
}
allocators_[p] = std::make_shared<AutoGrowthBestFitAllocator>(
Expand Down

1 comment on commit e562d20

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.