From a99405b983d2a73930cfac423c983e130fa9bd97 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Mon, 7 Jul 2014 15:26:24 +0800 Subject: [PATCH 1/5] Avoid using cudaMemcpy for memcpy when there is no GPU and CUDA driver --- src/caffe/test/test_math_functions.cpp | 2 ++ src/caffe/util/math_functions.cpp | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index 941d8b9479a..ddb9f060972 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -209,6 +209,7 @@ TYPED_TEST(MathFunctionsTest, TestCopyCPU) { const int n = this->blob_bottom_->count(); const TypeParam* bottom_data = this->blob_bottom_->cpu_data(); TypeParam* top_data = this->blob_top_->mutable_cpu_data(); + Caffe::set_mode(Caffe::CPU); caffe_copy(n, bottom_data, top_data); for (int i = 0; i < n; ++i) { EXPECT_EQ(bottom_data[i], top_data[i]); @@ -219,6 +220,7 @@ TYPED_TEST(MathFunctionsTest, TestCopyGPU) { const int n = this->blob_bottom_->count(); const TypeParam* bottom_data = this->blob_bottom_->gpu_data(); TypeParam* top_data = this->blob_top_->mutable_gpu_data(); + Caffe::set_mode(Caffe::GPU); caffe_copy(n, bottom_data, top_data); bottom_data = this->blob_bottom_->cpu_data(); top_data = this->blob_top_->mutable_cpu_data(); diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 918bb3c361c..df32093238f 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -152,7 +152,11 @@ void caffe_add_scalar(const int N, const double alpha, double* Y) { template void caffe_copy(const int N, const Dtype* X, Dtype* Y) { if (X != Y) { - CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyDefault)); + if (Caffe::mode() == Caffe::CPU) { + memcpy(Y, X, sizeof(Dtype) * N); + } else { + CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyDefault)); + } } } @@ -164,7 +168,11 @@ template void caffe_copy(const int N, const double* X, double* Y); void caffe_memcpy(const size_t N, const void* X, void* Y) { if (X != Y) { - CUDA_CHECK(cudaMemcpy(Y, X, N, cudaMemcpyDefault)); + if (Caffe::mode() == Caffe::CPU) { + memcpy(Y, X, N); + } else { + CUDA_CHECK(cudaMemcpy(Y, X, N, cudaMemcpyDefault)); + } } } From fd3b656ed3b57f8856898ad17dbaa6f090b9f133 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Wed, 9 Jul 2014 09:47:28 +0800 Subject: [PATCH 2/5] Check the GPU mode to decide which memcpy to use --- src/caffe/util/math_functions.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index df32093238f..9311a398721 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -152,10 +152,10 @@ void caffe_add_scalar(const int N, const double alpha, double* Y) { template void caffe_copy(const int N, const Dtype* X, Dtype* Y) { if (X != Y) { - if (Caffe::mode() == Caffe::CPU) { - memcpy(Y, X, sizeof(Dtype) * N); - } else { + if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyDefault)); + } else { + memcpy(Y, X, sizeof(Dtype) * N); } } } @@ -168,10 +168,10 @@ template void caffe_copy(const int N, const double* X, double* Y); void caffe_memcpy(const size_t N, const void* X, void* Y) { if (X != Y) { - if (Caffe::mode() == Caffe::CPU) { - memcpy(Y, X, N); - } else { + if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaMemcpy(Y, X, N, cudaMemcpyDefault)); + } else { + memcpy(Y, X, N); } } } From c096f23cbcfb130d18b3966f28c6376c53156ddf Mon Sep 17 00:00:00 2001 From: Kai Li Date: Wed, 9 Jul 2014 10:12:07 +0800 Subject: [PATCH 3/5] Switch to GPU mode when pointer is move to or from GPU in SyncedMemory --- src/caffe/syncedmem.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 9fe55280de9..5d6d0585222 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -33,6 +33,7 @@ inline void SyncedMemory::to_cpu() { CaffeMallocHost(&cpu_ptr_, size_); own_cpu_data_ = true; } + Caffe::set_mode(Caffe::GPU); caffe_memcpy(size_, gpu_ptr_, cpu_ptr_); head_ = SYNCED; break; @@ -53,6 +54,7 @@ inline void SyncedMemory::to_gpu() { if (gpu_ptr_ == NULL) { CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); } + Caffe::set_mode(Caffe::GPU); caffe_memcpy(size_, cpu_ptr_, gpu_ptr_); head_ = SYNCED; break; From 5db4df4e9756b9b9b52b354c03a671e2c75e094b Mon Sep 17 00:00:00 2001 From: Kai Li Date: Thu, 10 Jul 2014 01:57:16 +0800 Subject: [PATCH 4/5] Implement @Yangqing's solution to copy memory in the SyncedMemory --- src/caffe/syncedmem.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 5d6d0585222..3f9a3be9ebf 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -33,8 +33,7 @@ inline void SyncedMemory::to_cpu() { CaffeMallocHost(&cpu_ptr_, size_); own_cpu_data_ = true; } - Caffe::set_mode(Caffe::GPU); - caffe_memcpy(size_, gpu_ptr_, cpu_ptr_); + CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDefault)); head_ = SYNCED; break; case HEAD_AT_CPU: @@ -54,8 +53,7 @@ inline void SyncedMemory::to_gpu() { if (gpu_ptr_ == NULL) { CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); } - Caffe::set_mode(Caffe::GPU); - caffe_memcpy(size_, cpu_ptr_, gpu_ptr_); + CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyDefault)); head_ = SYNCED; break; case HEAD_AT_GPU: From 4a398ebba314528ad2086d2ef6629f91cdebe4fe Mon Sep 17 00:00:00 2001 From: Kai Li Date: Thu, 10 Jul 2014 07:50:31 +0800 Subject: [PATCH 5/5] Replace cudaMemcpy with caffe_gpu_memcpy in SyncedMemory per @longjon --- include/caffe/util/math_functions.hpp | 2 +- src/caffe/syncedmem.cpp | 4 ++-- src/caffe/test/test_syncedmem.cpp | 4 ++-- src/caffe/util/math_functions.cpp | 8 ++------ 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 97a057103db..2df0fc983f9 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -59,7 +59,7 @@ void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X, template void caffe_copy(const int N, const Dtype *X, Dtype *Y); -void caffe_memcpy(const size_t N, const void *X, void *Y); +void caffe_gpu_memcpy(const size_t N, const void *X, void *Y); template void caffe_set(const int N, const Dtype alpha, Dtype *X); diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp index 3f9a3be9ebf..77dfe7a4636 100644 --- a/src/caffe/syncedmem.cpp +++ b/src/caffe/syncedmem.cpp @@ -33,7 +33,7 @@ inline void SyncedMemory::to_cpu() { CaffeMallocHost(&cpu_ptr_, size_); own_cpu_data_ = true; } - CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDefault)); + caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_); head_ = SYNCED; break; case HEAD_AT_CPU: @@ -53,7 +53,7 @@ inline void SyncedMemory::to_gpu() { if (gpu_ptr_ == NULL) { CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); } - CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyDefault)); + caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_); head_ = SYNCED; break; case HEAD_AT_GPU: diff --git a/src/caffe/test/test_syncedmem.cpp b/src/caffe/test/test_syncedmem.cpp index 3aaeafc353e..3a757088e89 100644 --- a/src/caffe/test/test_syncedmem.cpp +++ b/src/caffe/test/test_syncedmem.cpp @@ -58,7 +58,7 @@ TEST_F(SyncedMemoryTest, TestGPURead) { EXPECT_EQ(mem.head(), SyncedMemory::SYNCED); // check if values are the same char* recovered_value = new char[10]; - caffe_memcpy(10, gpu_data, recovered_value); + caffe_gpu_memcpy(10, gpu_data, recovered_value); for (int i = 0; i < mem.size(); ++i) { EXPECT_EQ((reinterpret_cast(recovered_value))[i], 1); } @@ -72,7 +72,7 @@ TEST_F(SyncedMemoryTest, TestGPURead) { gpu_data = mem.gpu_data(); EXPECT_EQ(mem.head(), SyncedMemory::SYNCED); // check if values are the same - caffe_memcpy(10, gpu_data, recovered_value); + caffe_gpu_memcpy(10, gpu_data, recovered_value); for (int i = 0; i < mem.size(); ++i) { EXPECT_EQ((reinterpret_cast(recovered_value))[i], 2); } diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 9311a398721..b989ca2ab69 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -166,13 +166,9 @@ template void caffe_copy(const int N, const unsigned int* X, template void caffe_copy(const int N, const float* X, float* Y); template void caffe_copy(const int N, const double* X, double* Y); -void caffe_memcpy(const size_t N, const void* X, void* Y) { +void caffe_gpu_memcpy(const size_t N, const void* X, void* Y) { if (X != Y) { - if (Caffe::mode() == Caffe::GPU) { - CUDA_CHECK(cudaMemcpy(Y, X, N, cudaMemcpyDefault)); - } else { - memcpy(Y, X, N); - } + CUDA_CHECK(cudaMemcpy(Y, X, N, cudaMemcpyDefault)); } }