forked from BVLC/caffe
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/master'
Tile Layer BVLC#2083
- Loading branch information
Showing
15 changed files
with
877 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#ifndef CAFFE_UTIL_GPU_UTIL_H_ | ||
#define CAFFE_UTIL_GPU_UTIL_H_ | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
inline __device__ Dtype caffe_gpu_atomic_add(const Dtype val, Dtype* address); | ||
|
||
template <> | ||
inline __device__ | ||
float caffe_gpu_atomic_add(const float val, float* address) { | ||
return atomicAdd(address, val); | ||
} | ||
|
||
// double atomicAdd implementation taken from: | ||
// http://docs.nvidia.com/cuda/cuda-c-programming-guide/#axzz3PVCpVsEG | ||
template <> | ||
inline __device__ | ||
double caffe_gpu_atomic_add(const double val, double* address) { | ||
unsigned long long int* address_as_ull = // NOLINT(runtime/int) | ||
// NOLINT_NEXT_LINE(runtime/int) | ||
reinterpret_cast<unsigned long long int*>(address); | ||
unsigned long long int old = *address_as_ull; // NOLINT(runtime/int) | ||
unsigned long long int assumed; // NOLINT(runtime/int) | ||
do { | ||
assumed = old; | ||
old = atomicCAS(address_as_ull, assumed, | ||
__double_as_longlong(val + __longlong_as_double(assumed))); | ||
} while (assumed != old); | ||
return __longlong_as_double(old); | ||
} | ||
|
||
} // namespace caffe | ||
|
||
#endif // CAFFE_UTIL_GPU_UTIL_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#include <vector> | ||
|
||
#include "caffe/blob.hpp" | ||
#include "caffe/common.hpp" | ||
#include "caffe/common_layers.hpp" | ||
#include "caffe/filler.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
void EmbedLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
N_ = this->layer_param_.embed_param().num_output(); | ||
CHECK_GT(N_, 0) << "EmbedLayer num_output must be positive."; | ||
K_ = this->layer_param_.embed_param().input_dim(); | ||
CHECK_GT(K_, 0) << "EmbedLayer input_dim must be positive."; | ||
bias_term_ = this->layer_param_.embed_param().bias_term(); | ||
// Check if we need to set up the weights | ||
if (this->blobs_.size() > 0) { | ||
LOG(INFO) << "Skipping parameter initialization"; | ||
} else { | ||
if (bias_term_) { | ||
this->blobs_.resize(2); | ||
} else { | ||
this->blobs_.resize(1); | ||
} | ||
// Initialize the weights -- | ||
// transposed from InnerProductLayer for spatial locality. | ||
vector<int> weight_shape(2); | ||
weight_shape[0] = K_; | ||
weight_shape[1] = N_; | ||
this->blobs_[0].reset(new Blob<Dtype>(weight_shape)); | ||
// fill the weights | ||
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>( | ||
this->layer_param_.embed_param().weight_filler())); | ||
weight_filler->Fill(this->blobs_[0].get()); | ||
// If necessary, initialize and fill the bias term | ||
if (bias_term_) { | ||
vector<int> bias_shape(1, N_); | ||
this->blobs_[1].reset(new Blob<Dtype>(bias_shape)); | ||
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>( | ||
this->layer_param_.embed_param().bias_filler())); | ||
bias_filler->Fill(this->blobs_[1].get()); | ||
} | ||
} // parameter initialization | ||
this->param_propagate_down_.resize(this->blobs_.size(), true); | ||
} | ||
|
||
template <typename Dtype> | ||
void EmbedLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
// Figure out the dimensions | ||
M_ = bottom[0]->count(); | ||
vector<int> top_shape = bottom[0]->shape(); | ||
top_shape.push_back(N_); | ||
top[0]->Reshape(top_shape); | ||
// Set up the bias multiplier | ||
if (bias_term_) { | ||
vector<int> bias_shape(1, M_); | ||
bias_multiplier_.Reshape(bias_shape); | ||
caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void EmbedLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
const Dtype* weight = this->blobs_[0]->cpu_data(); | ||
Dtype* top_data = top[0]->mutable_cpu_data(); | ||
int index; | ||
for (int n = 0; n < M_; ++n) { | ||
index = static_cast<int>(bottom_data[n]); | ||
DCHECK_GE(index, 0); | ||
DCHECK_LT(index, K_); | ||
DCHECK_EQ(static_cast<Dtype>(index), bottom_data[n]) << "non-integer input"; | ||
caffe_copy(N_, weight + index * N_, top_data + n * N_); | ||
} | ||
if (bias_term_) { | ||
const Dtype* bias = this->blobs_[1]->cpu_data(); | ||
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, Dtype(1), | ||
bias_multiplier_.cpu_data(), bias, Dtype(1), top_data); | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void EmbedLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
CHECK(!propagate_down[0]) << "Can't backpropagate to EmbedLayer input."; | ||
if (this->param_propagate_down_[0]) { | ||
const Dtype* top_diff = top[0]->cpu_diff(); | ||
const Dtype* bottom_data = bottom[0]->cpu_data(); | ||
// Gradient with respect to weight | ||
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); | ||
int index; | ||
for (int n = 0; n < M_; ++n) { | ||
index = static_cast<int>(bottom_data[n]); | ||
DCHECK_GE(index, 0); | ||
DCHECK_LT(index, K_); | ||
DCHECK_EQ(static_cast<Dtype>(index), bottom_data[n]) | ||
<< "non-integer input"; | ||
caffe_axpy(N_, Dtype(1), top_diff + n * N_, weight_diff + index * N_); | ||
} | ||
} | ||
if (bias_term_ && this->param_propagate_down_[1]) { | ||
const Dtype* top_diff = top[0]->cpu_diff(); | ||
Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); | ||
caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, Dtype(1), top_diff, | ||
bias_multiplier_.cpu_data(), Dtype(1), bias_diff); | ||
} | ||
} | ||
|
||
#ifdef CPU_ONLY | ||
STUB_GPU(EmbedLayer); | ||
#endif | ||
|
||
INSTANTIATE_CLASS(EmbedLayer); | ||
REGISTER_LAYER_CLASS(Embed); | ||
|
||
} // namespace caffe |
Oops, something went wrong.