Make backward pass work when global stats is active for BatchNormLayer

including minor code cleaning
BVLC · Nov 10, 2015 · b5fa3c7 · b5fa3c7
1 parent de015c5
commit b5fa3c7
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 20 deletions.
diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
@@ -84,8 +84,7 @@ void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   }
 
   if (use_global_stats_) {
-    // use the stored mean/variance estimates.  TODO(cdoersch): allow an option
-    // to use an unbiased variance estimate, like the paper does.
+    // use the stored mean/variance estimates.
     caffe_copy(mean_.count(), this->blobs_[0]->cpu_data(),
         mean_.mutable_cpu_data());
     int m = bottom[0]->count()/channels_;
@@ -158,16 +157,13 @@ template <typename Dtype>
 void BatchNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
     const vector<Blob<Dtype>*>& bottom) {
-  CHECK(!use_global_stats_);
-  const Dtype* top_diff;
-  if (bottom[0] != top[0]) {
-    top_diff = top[0]->cpu_diff();
-  } else {
-    caffe_copy(x_norm_.count(), top[0]->cpu_diff(), x_norm_.mutable_cpu_diff());
-    top_diff = x_norm_.cpu_diff();
+  const Dtype* top_diff = top[0]->cpu_diff();
+  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+  if (use_global_stats_) {
+    caffe_div(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
+    return;
   }
   const Dtype* top_data = x_norm_.cpu_data();
-  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
   int num = bottom[0]->shape()[0];
   int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_);
   // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then

diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu
@@ -20,8 +20,7 @@ void BatchNormLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
 
   if (use_global_stats_) {
-    // use the stored mean/variance estimates.  TODO(cdoersch): allow an option
-    // to use an unbiased variance estimate, like the paper does.
+    // use the stored mean/variance estimates.
     caffe_copy(mean_.count(), this->blobs_[0]->gpu_data(),
         mean_.mutable_gpu_data());
     int m = bottom[0]->count()/channels_;
@@ -94,16 +93,13 @@ template <typename Dtype>
 void BatchNormLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
     const vector<Blob<Dtype>*>& bottom) {
-  CHECK(!use_global_stats_);
-  const Dtype* top_diff;
-  if (bottom[0] != top[0]) {
-    top_diff = top[0]->gpu_diff();
-  } else {
-    caffe_copy(x_norm_.count(), top[0]->gpu_diff(), x_norm_.mutable_gpu_diff());
-    top_diff = x_norm_.gpu_diff();
+  const Dtype* top_diff = top[0]->gpu_diff();
+  Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+  if (use_global_stats_) {
+    caffe_gpu_div(temp_.count(), top_diff, temp_.gpu_data(), bottom_diff);
+    return;
   }
   const Dtype* top_data = x_norm_.gpu_data();
-  Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
   int num = bottom[0]->shape()[0];
   int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0));
   // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then