-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3591 from jeffdonahue/scale-bias-layer
Scale and Bias Layers
- Loading branch information
Showing
9 changed files
with
1,717 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#ifndef CAFFE_BIAS_LAYER_HPP_ | ||
#define CAFFE_BIAS_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief Computes a sum of two input Blobs, with the shape of the | ||
* latter Blob "broadcast" to match the shape of the former. | ||
* Equivalent to tiling the latter Blob, then computing the elementwise | ||
* sum. | ||
* | ||
* The second input may be omitted, in which case it's learned as a parameter | ||
* of the layer. | ||
*/ | ||
template <typename Dtype> | ||
class BiasLayer : public Layer<Dtype> { | ||
public: | ||
explicit BiasLayer(const LayerParameter& param) | ||
: Layer<Dtype>(param) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "Bias"; } | ||
virtual inline int MinBottomBlobs() const { return 1; } | ||
virtual inline int MaxBottomBlobs() const { return 2; } | ||
virtual inline int ExactNumTopBlobs() const { return 1; } | ||
|
||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
private: | ||
Blob<Dtype> bias_multiplier_; | ||
int outer_dim_, bias_dim_, inner_dim_, dim_; | ||
}; | ||
|
||
|
||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_BIAS_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#ifndef CAFFE_SCALE_LAYER_HPP_ | ||
#define CAFFE_SCALE_LAYER_HPP_ | ||
|
||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
|
||
#include "caffe/layers/bias_layer.hpp" | ||
|
||
namespace caffe { | ||
|
||
/** | ||
* @brief Computes a product of two input Blobs, with the shape of the | ||
* latter Blob "broadcast" to match the shape of the former. | ||
* Equivalent to tiling the latter Blob, then computing the elementwise | ||
* product. | ||
* | ||
* The second input may be omitted, in which case it's learned as a parameter | ||
* of the layer. | ||
*/ | ||
template <typename Dtype> | ||
class ScaleLayer: public Layer<Dtype> { | ||
public: | ||
explicit ScaleLayer(const LayerParameter& param) | ||
: Layer<Dtype>(param) {} | ||
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
|
||
virtual inline const char* type() const { return "Scale"; } | ||
// Scale | ||
virtual inline int MinBottomBlobs() const { return 1; } | ||
virtual inline int MaxBottomBlobs() const { return 2; } | ||
virtual inline int ExactNumTopBlobs() const { return 1; } | ||
|
||
protected: | ||
/** | ||
* In the below shape specifications, @f$ i @f$ denotes the value of the | ||
* `axis` field given by `this->layer_param_.scale_param().axis()`, after | ||
* canonicalization (i.e., conversion from negative to positive index, | ||
* if applicable). | ||
* | ||
* @param bottom input Blob vector (length 2) | ||
* -# @f$ (d_0 \times ... \times | ||
* d_i \times ... \times d_j \times ... \times d_n) @f$ | ||
* the first factor @f$ x @f$ | ||
* -# @f$ (d_i \times ... \times d_j) @f$ | ||
* the second factor @f$ y @f$ | ||
* @param top output Blob vector (length 1) | ||
* -# @f$ (d_0 \times ... \times | ||
* d_i \times ... \times d_j \times ... \times d_n) @f$ | ||
* the product @f$ z = x y @f$ computed after "broadcasting" y. | ||
* Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$, | ||
* then computing the elementwise product. | ||
*/ | ||
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top); | ||
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); | ||
|
||
shared_ptr<Layer<Dtype> > bias_layer_; | ||
vector<Blob<Dtype>*> bias_bottom_vec_; | ||
vector<bool> bias_propagate_down_; | ||
int bias_param_id_; | ||
|
||
Blob<Dtype> sum_multiplier_; | ||
Blob<Dtype> sum_result_; | ||
Blob<Dtype> temp_; | ||
int axis_; | ||
int outer_dim_, scale_dim_, inner_dim_; | ||
}; | ||
|
||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_SCALE_LAYER_HPP_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
#include <vector> | ||
|
||
#include "caffe/filler.hpp" | ||
#include "caffe/layers/bias_layer.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
void BiasLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
if (bottom.size() == 1 && this->blobs_.size() > 0) { | ||
LOG(INFO) << "Skipping parameter initialization"; | ||
} else if (bottom.size() == 1) { | ||
// bias is a learned parameter; initialize it | ||
const BiasParameter& param = this->layer_param_.bias_param(); | ||
const int axis = bottom[0]->CanonicalAxisIndex(param.axis()); | ||
const int num_axes = param.num_axes(); | ||
CHECK_GE(num_axes, -1) << "num_axes must be non-negative, " | ||
<< "or -1 to extend to the end of bottom[0]"; | ||
if (num_axes >= 0) { | ||
CHECK_GE(bottom[0]->num_axes(), axis + num_axes) | ||
<< "bias blob's shape extends past bottom[0]'s shape when applied " | ||
<< "starting with bottom[0] axis = " << axis; | ||
} | ||
this->blobs_.resize(1); | ||
const vector<int>::const_iterator& shape_start = | ||
bottom[0]->shape().begin() + axis; | ||
const vector<int>::const_iterator& shape_end = | ||
(num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes); | ||
vector<int> bias_shape(shape_start, shape_end); | ||
this->blobs_[0].reset(new Blob<Dtype>(bias_shape)); | ||
shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(param.filler())); | ||
filler->Fill(this->blobs_[0].get()); | ||
} | ||
this->param_propagate_down_.resize(this->blobs_.size(), true); | ||
} | ||
|
||
template <typename Dtype> | ||
void BiasLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const BiasParameter& param = this->layer_param_.bias_param(); | ||
Blob<Dtype>* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); | ||
// Always set axis == 0 in special case where bias is a scalar | ||
// (num_axes == 0). Mathematically equivalent for any choice of axis, so the | ||
// actual setting can be safely ignored; and computation is most efficient | ||
// with axis == 0 and (therefore) outer_dim_ == 1. | ||
const int axis = (bias->num_axes() == 0) ? | ||
0 : bottom[0]->CanonicalAxisIndex(param.axis()); | ||
CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes()) | ||
<< "bias blob's shape extends past bottom[0]'s shape when applied " | ||
<< "starting with bottom[0] axis = " << axis; | ||
for (int i = 0; i < bias->num_axes(); ++i) { | ||
CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i)) | ||
<< "dimension mismatch between bottom[0]->shape(" << axis + i | ||
<< ") and bias->shape(" << i << ")"; | ||
} | ||
outer_dim_ = bottom[0]->count(0, axis); | ||
bias_dim_ = bias->count(); | ||
inner_dim_ = bottom[0]->count(axis + bias->num_axes()); | ||
dim_ = bias_dim_ * inner_dim_; | ||
if (bottom[0] != top[0]) { | ||
top[0]->ReshapeLike(*bottom[0]); | ||
} | ||
bias_multiplier_.Reshape(vector<int>(1, inner_dim_)); | ||
if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) { | ||
caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data()); | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void BiasLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const Dtype* bias_data = | ||
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); | ||
Dtype* top_data = top[0]->mutable_cpu_data(); | ||
if (bottom[0] != top[0]) { | ||
const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
caffe_copy(bottom[0]->count(), bottom_data, top_data); | ||
} | ||
for (int n = 0; n < outer_dim_; ++n) { | ||
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_, | ||
inner_dim_, Dtype(1), Dtype(1), bias_data, | ||
bias_multiplier_.cpu_data(), Dtype(1), top_data); | ||
top_data += dim_; | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void BiasLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
if (propagate_down[0] && bottom[0] != top[0]) { | ||
const Dtype* top_diff = top[0]->cpu_diff(); | ||
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); | ||
caffe_copy(bottom[0]->count(), top_diff, bottom_diff); | ||
} | ||
// in-place, we don't need to do anything with the data diff | ||
const bool bias_param = (bottom.size() == 1); | ||
if ((!bias_param && propagate_down[1]) || | ||
(bias_param && this->param_propagate_down_[0])) { | ||
const Dtype* top_diff = top[0]->cpu_diff(); | ||
Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1]) | ||
->mutable_cpu_diff(); | ||
bool accum = bias_param; | ||
for (int n = 0; n < outer_dim_; ++n) { | ||
caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1), | ||
top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff); | ||
top_diff += dim_; | ||
accum = true; | ||
} | ||
} | ||
} | ||
|
||
#ifdef CPU_ONLY | ||
STUB_GPU(BiasLayer); | ||
#endif | ||
|
||
INSTANTIATE_CLASS(BiasLayer); | ||
REGISTER_LAYER_CLASS(Bias); | ||
|
||
} // namespace caffe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#include <vector> | ||
|
||
#include "caffe/filler.hpp" | ||
#include "caffe/layers/bias_layer.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
__global__ void BiasForward(const int n, const Dtype* in, | ||
const Dtype* bias, const int bias_dim, const int inner_dim, | ||
Dtype* out) { | ||
CUDA_KERNEL_LOOP(index, n) { | ||
const int bias_index = (index / inner_dim) % bias_dim; | ||
out[index] = in[index] + bias[bias_index]; | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void BiasLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const int count = top[0]->count(); | ||
const Dtype* bottom_data = bottom[0]->gpu_data(); | ||
const Dtype* bias_data = | ||
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data(); | ||
Dtype* top_data = top[0]->mutable_gpu_data(); | ||
BiasForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators) | ||
<<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( | ||
count, bottom_data, bias_data, bias_dim_, inner_dim_, top_data); | ||
} | ||
|
||
template <typename Dtype> | ||
void BiasLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
if (propagate_down[0] && bottom[0] != top[0]) { | ||
const Dtype* top_diff = top[0]->gpu_diff(); | ||
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); | ||
caffe_copy(bottom[0]->count(), top_diff, bottom_diff); | ||
} | ||
// in-place, we don't need to do anything with the data diff | ||
const bool bias_param = (bottom.size() == 1); | ||
if ((!bias_param && propagate_down[1]) || | ||
(bias_param && this->param_propagate_down_[0])) { | ||
const Dtype* top_diff = top[0]->gpu_diff(); | ||
Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1]) | ||
->mutable_gpu_diff(); | ||
bool accum = bias_param; | ||
for (int n = 0; n < outer_dim_; ++n) { | ||
caffe_gpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1), | ||
top_diff, bias_multiplier_.gpu_data(), Dtype(accum), bias_diff); | ||
top_diff += dim_; | ||
accum = true; | ||
} | ||
} | ||
} | ||
|
||
INSTANTIATE_LAYER_GPU_FUNCS(BiasLayer); | ||
|
||
} // namespace caffe |
Oops, something went wrong.