Skip to content

Commit

Permalink
Merge pull request #3487 from longjon/dilation
Browse files Browse the repository at this point in the history
An early implementation of this functionality for Caffe was written by @gpapan, which was extended and improved by @tamakoji in a previous implementation of this branch.
  • Loading branch information
longjon committed Dec 28, 2015
2 parents 03a84bf + bbc4e57 commit 08c5dfd
Show file tree
Hide file tree
Showing 16 changed files with 479 additions and 155 deletions.
22 changes: 14 additions & 8 deletions include/caffe/layers/base_conv_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
/// @brief The spatial dimensions of the dilation.
Blob<int> dilation_;
/// @brief The spatial dimensions of the convolution input.
Blob<int> conv_input_shape_;
/// @brief The spatial dimensions of the col_buffer.
Expand Down Expand Up @@ -99,11 +101,12 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
} else {
im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
pad_.cpu_data(), stride_.cpu_data(), col_buff);
pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), col_buff);
}
}
inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
Expand All @@ -112,11 +115,12 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], data);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
} else {
col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
pad_.cpu_data(), stride_.cpu_data(), data);
pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), data);
}
}
#ifndef CPU_ONLY
Expand All @@ -126,12 +130,13 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
} else {
im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(),
stride_.gpu_data(), col_buff);
stride_.gpu_data(), dilation_.gpu_data(), col_buff);
}
}
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
Expand All @@ -140,12 +145,13 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1], data);
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
} else {
col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
data);
dilation_.gpu_data(), data);
}
}
#endif
Expand Down
3 changes: 3 additions & 0 deletions include/caffe/layers/conv_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
* convolution, given by pad for equal dimensions or pad_h and pad_w for
* different padding. Input padding is computed implicitly instead of
* actually padding.
* - dilation (\b optional, default 1). The filter
* dilation, given by dilation_size for equal dimensions for different
* dilation. By default the convolution has dilation 1.
* - group (\b optional, default 1). The number of filter groups. Group
* convolution is a method for reducing parameterization by selectively
* connecting input and output channels. The input and output channel dimensions must be divisible
Expand Down
2 changes: 2 additions & 0 deletions include/caffe/layers/im2col_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class Im2colLayer : public Layer<Dtype> {
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
/// @brief The spatial dimensions of the dilation.
Blob<int> dilation_;

int num_spatial_axes_;
int bottom_dim_;
Expand Down
20 changes: 12 additions & 8 deletions include/caffe/util/im2col.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,53 @@ template <typename Dtype>
void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_col);
const int* dilation, Dtype* data_col);

template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_col);

template <typename Dtype>
void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_im);
const int* dilation, Dtype* data_im);

template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_im);

template <typename Dtype>
void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes,
const int col_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_col);
const int* dilation, Dtype* data_col);

template <typename Dtype>
void im2col_gpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_col);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_col);

template <typename Dtype>
void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes,
const int im_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
Dtype* data_im);
const int* dilation, Dtype* data_im);

template <typename Dtype>
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, Dtype* data_im);
const int stride_w, const int dilation_h, const int dilation_w,
Dtype* data_im);

} // namespace caffe

Expand Down
19 changes: 17 additions & 2 deletions src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,32 @@ namespace caffe {
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetConvolutionLayer(
const LayerParameter& param) {
ConvolutionParameter_Engine engine = param.convolution_param().engine();
ConvolutionParameter conv_param = param.convolution_param();
ConvolutionParameter_Engine engine = conv_param.engine();
#ifdef USE_CUDNN
bool use_dilation = false;
for (int i = 0; i < conv_param.dilation_size(); ++i) {
if (conv_param.dilation(i) > 1) {
use_dilation = true;
}
}
#endif
if (engine == ConvolutionParameter_Engine_DEFAULT) {
engine = ConvolutionParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = ConvolutionParameter_Engine_CUDNN;
if (!use_dilation) {
engine = ConvolutionParameter_Engine_CUDNN;
}
#endif
}
if (engine == ConvolutionParameter_Engine_CAFFE) {
return shared_ptr<Layer<Dtype> >(new ConvolutionLayer<Dtype>(param));
#ifdef USE_CUDNN
} else if (engine == ConvolutionParameter_Engine_CUDNN) {
if (use_dilation) {
LOG(FATAL) << "CuDNN doesn't support the dilated convolution at Layer "
<< param.name();
}
return shared_ptr<Layer<Dtype> >(new CuDNNConvolutionLayer<Dtype>(param));
#endif
} else {
Expand Down
20 changes: 17 additions & 3 deletions src/caffe/layers/base_conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)
<< "kernel_size must be specified once, or once per spatial dimension "
<< "(kernel_size specified " << num_kernel_dims << " times; "
<< num_spatial_axes_ << " spatial dims);";
<< num_spatial_axes_ << " spatial dims).";
for (int i = 0; i < num_spatial_axes_; ++i) {
kernel_shape_data[i] =
conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);
Expand All @@ -61,7 +61,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
num_stride_dims == num_spatial_axes_)
<< "stride must be specified once, or once per spatial dimension "
<< "(stride specified " << num_stride_dims << " times; "
<< num_spatial_axes_ << " spatial dims);";
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultStride = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :
Expand All @@ -85,13 +85,27 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
num_pad_dims == num_spatial_axes_)
<< "pad must be specified once, or once per spatial dimension "
<< "(pad specified " << num_pad_dims << " times; "
<< num_spatial_axes_ << " spatial dims);";
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultPad = 0;
for (int i = 0; i < num_spatial_axes_; ++i) {
pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
conv_param.pad((num_pad_dims == 1) ? 0 : i);
}
}
// Setup dilation dimensions (dilation_).
dilation_.Reshape(spatial_dim_blob_shape);
int* dilation_data = dilation_.mutable_cpu_data();
const int num_dilation_dims = conv_param.dilation_size();
CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
num_dilation_dims == num_spatial_axes_)
<< "dilation must be specified once, or once per spatial dimension "
<< "(dilation specified " << num_dilation_dims << " times; "
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultDilation = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
}
// Special case: im2col is the identity for 1x1 convolution with stride 1
// and no padding, so flag for skipping the buffer and transformation.
is_1x1_ = true;
Expand Down
4 changes: 3 additions & 1 deletion src/caffe/layers/conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int* stride_data = this->stride_.cpu_data();
const int* pad_data = this->pad_.cpu_data();
const int* dilation_data = this->dilation_.cpu_data();
this->output_shape_.clear();
for (int i = 0; i < this->num_spatial_axes_; ++i) {
// i + 1 to skip channel axis
const int input_dim = this->input_shape(i + 1);
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
/ stride_data[i] + 1;
this->output_shape_.push_back(output_dim);
}
Expand Down
4 changes: 3 additions & 1 deletion src/caffe/layers/deconv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ void DeconvolutionLayer<Dtype>::compute_output_shape() {
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int* stride_data = this->stride_.cpu_data();
const int* pad_data = this->pad_.cpu_data();
const int* dilation_data = this->dilation_.cpu_data();
this->output_shape_.clear();
for (int i = 0; i < this->num_spatial_axes_; ++i) {
// i + 1 to skip channel axis
const int input_dim = this->input_shape(i + 1);
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
const int output_dim = stride_data[i] * (input_dim - 1)
+ kernel_shape_data[i] - 2 * pad_data[i];
+ kernel_extent - 2 * pad_data[i];
this->output_shape_.push_back(output_dim);
}
}
Expand Down
25 changes: 22 additions & 3 deletions src/caffe/layers/im2col_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ void Im2colLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
conv_param.pad((num_pad_dims == 1) ? 0 : i);
}
}
// Setup dilation dimensions (dilation_).
dilation_.Reshape(dim_blob_shape);
int* dilation_data = dilation_.mutable_cpu_data();
const int num_dilation_dims = conv_param.dilation_size();
CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
num_dilation_dims == num_spatial_axes_)
<< "dilation must be specified once, or once per spatial dimension "
<< "(dilation specified " << num_dilation_dims << " times; "
<< num_spatial_axes_ << " spatial dims).";
const int kDefaultDilation = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
}
}

template <typename Dtype>
Expand All @@ -96,10 +110,12 @@ void Im2colLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const int* kernel_shape_data = kernel_shape_.cpu_data();
const int* stride_data = stride_.cpu_data();
const int* pad_data = pad_.cpu_data();
const int* dilation_data = dilation_.cpu_data();
for (int i = 0; i < num_spatial_axes_; ++i) {
top_shape[channel_axis_] *= kernel_shape_data[i];
const int input_dim = bottom[0]->shape(channel_axis_ + i + 1);
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
/ stride_data[i] + 1;
top_shape[channel_axis_ + i + 1] = output_dim;
}
Expand All @@ -122,20 +138,22 @@ void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_);
DCHECK_EQ(pad_.count(), num_spatial_axes_);
DCHECK_EQ(stride_.count(), num_spatial_axes_);
DCHECK_EQ(dilation_.count(), num_spatial_axes_);
if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
im2col_cpu(bottom_data + n * bottom_dim_, channels_,
bottom[0]->shape(channel_axis_ + 1),
bottom[0]->shape(channel_axis_ + 2),
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
top_data + n * top_dim_);
} else {
im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
bottom[0]->shape().data() + channel_axis_,
top[0]->shape().data() + channel_axis_,
kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
top_data + n * top_dim_);
dilation_.cpu_data(), top_data + n * top_dim_);
}
}
}
Expand All @@ -153,13 +171,14 @@ void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
bottom_diff + n * bottom_dim_);
} else {
col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_,
bottom[0]->shape().data() + channel_axis_,
top[0]->shape().data() + channel_axis_,
kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
bottom_diff + n * bottom_dim_);
dilation_.cpu_data(), bottom_diff + n * bottom_dim_);
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/caffe/layers/im2col_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
top_data + n * top_dim_);
} else {
im2col_nd_gpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
num_kernels, bottom[0]->gpu_shape() + channel_axis_,
top[0]->gpu_shape() + channel_axis_,
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
top_data + n * top_dim_);
dilation_.gpu_data(), top_data + n * top_dim_);
}
}
}
Expand All @@ -43,13 +44,14 @@ void Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
dilation_.cpu_data()[0], dilation_.cpu_data()[1],
bottom_diff + n * bottom_dim_);
} else {
col2im_nd_gpu(top_diff + n * top_dim_, num_spatial_axes_, bottom_dim_,
bottom[0]->gpu_shape() + channel_axis_,
top[0]->gpu_shape() + channel_axis_,
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
bottom_diff + n * bottom_dim_);
dilation_.gpu_data(), bottom_diff + n * bottom_dim_);
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,10 @@ message ConvolutionParameter {
repeated uint32 pad = 3; // The padding size; defaults to 0
repeated uint32 kernel_size = 4; // The kernel size
repeated uint32 stride = 6; // The stride; defaults to 1
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme à trous from Holschneider et al. 1987.)
repeated uint32 dilation = 18; // The dilation; defaults to 1

// For 2D convolution only, the *_h and *_w versions may also be used to
// specify both spatial dimensions.
Expand Down
Loading

0 comments on commit 08c5dfd

Please sign in to comment.