From d54ae85a3ec06aa7ecca5487e11d8592b40e06c5 Mon Sep 17 00:00:00 2001 From: LI Yi Date: Thu, 12 Nov 2015 21:56:16 +0800 Subject: [PATCH] Not enable peer access in case of the GPUs are located over QPI --- src/caffe/parallel.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index 9abc92b612d..9bcab270dc7 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -232,7 +233,19 @@ P2PSync::P2PSync(shared_ptr > root_solver, int access; CUDA_CHECK(cudaDeviceCanAccessPeer(&access, self, peer)); if (access) { - CUDA_CHECK(cudaDeviceEnablePeerAccess(peer, 0)); + cudaDeviceProp a, b; + CUDA_CHECK(cudaGetDeviceProperties(&a, self)); + CUDA_CHECK(cudaGetDeviceProperties(&b, peer)); + const int pci_bus_id_offset = 0x80; + if (std::max(a.pciBusID, b.pciBusID) < pci_bus_id_offset || + std::min(a.pciBusID, b.pciBusID) >= pci_bus_id_offset) { + CUDA_CHECK(cudaDeviceEnablePeerAccess(peer, 0)); + } else { + LOG(INFO) << "This will result in poor memcpy performance over QPI, " + << "if enables peer to peer access from GPU " + << self << " (pciBusID " << a.pciBusID << ") to GPU " + << peer << " (pciBusID " << b.pciBusID << ")"; + } } else { LOG(INFO)<< "GPU " << self << " does not have p2p access to GPU " << peer; } @@ -262,7 +275,14 @@ P2PSync::~P2PSync() { int access; CUDA_CHECK(cudaDeviceCanAccessPeer(&access, self, peer)); if (access) { - CUDA_CHECK(cudaDeviceDisablePeerAccess(peer)); + cudaDeviceProp a, b; + CUDA_CHECK(cudaGetDeviceProperties(&a, self)); + CUDA_CHECK(cudaGetDeviceProperties(&b, peer)); + const int pci_bus_id_offset = 0x80; + if (std::max(a.pciBusID, b.pciBusID) < pci_bus_id_offset || + std::min(a.pciBusID, b.pciBusID) >= pci_bus_id_offset) { + CUDA_CHECK(cudaDeviceDisablePeerAccess(peer)); + } } }