BVLC · jmerkow · Mar 5, 2015 · Mar 5, 2015 · Mar 5, 2015 · Mar 6, 2015
diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
@@ -219,6 +219,7 @@ class Blob {
 
   const Dtype* cpu_data() const;
   void set_cpu_data(Dtype* data);
+  const int* gpu_shape() const;
   const Dtype* gpu_data() const;
   const Dtype* cpu_diff() const;
   const Dtype* gpu_diff() const;
@@ -268,6 +269,7 @@ class Blob {
  protected:
   shared_ptr<SyncedMemory> data_;
   shared_ptr<SyncedMemory> diff_;
+  shared_ptr<SyncedMemory> shape_data_;
   vector<int> shape_;
   int count_;
   int capacity_;

diff --git a/include/caffe/test/test_gradient_check_util.hpp b/include/caffe/test/test_gradient_check_util.hpp
@@ -157,6 +157,7 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
       Dtype feature = current_blob->cpu_data()[feat_id];
       // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
       //     << current_blob->cpu_diff()[feat_id];
+
       if (kink_ - kink_range_ > fabs(feature)
           || fabs(feature) > kink_ + kink_range_) {
         // We check relative accuracy, but for too small values, we threshold

diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp
@@ -4,28 +4,28 @@
 namespace caffe {
 
 template <typename Dtype>
-void im2col_cpu(const Dtype* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, Dtype* data_col);
+void im2col_cpu(const Dtype* data_im, const int num_spatial_axes,
+    const int* im_shape, const int* col_shape,
+    const int* kernel_shape, const int* pad, const int* stride,
+    Dtype* data_col);
 
 template <typename Dtype>
-void col2im_cpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, Dtype* data_im);
+void col2im_cpu(const Dtype* data_col, const int num_spatial_axes,
+    const int* im_shape, const int* col_shape,
+    const int* kernel_shape, const int* pad, const int* stride,
+    Dtype* data_im);
 
 template <typename Dtype>
-void im2col_gpu(const Dtype* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, Dtype* data_col);
+void im2col_gpu(const Dtype* data_im, const int num_spatial_axes,
+    const int col_size, const int* im_shape, const int* col_shape,
+    const int* kernel_shape, const int* pad, const int* stride,
+    Dtype* data_col);
 
 template <typename Dtype>
-void col2im_gpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, Dtype* data_im);
+void col2im_gpu(const Dtype* data_col, const int num_spatial_axes,
+    const int im_size, const int* im_shape, const int* col_shape,
+    const int* kernel_shape, const int* pad, const int* stride,
+    Dtype* data_im);
 
 }  // namespace caffe
 

diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
@@ -64,44 +64,66 @@ class BaseConvolutionLayer : public Layer<Dtype> {
   // Compute height_out_ and width_out_ from other parameters.
   virtual void compute_output_shape() = 0;
 
-  int kernel_h_, kernel_w_;
-  int stride_h_, stride_w_;
+  /// @brief The spatial dimensions of a filter kernel.
+  Blob<int> kernel_shape_;
+  /// @brief The spatial dimensions of the stride.
+  Blob<int> stride_;
+  /// @brief The spatial dimensions of the padding.
+  Blob<int> pad_;
+  /// @brief The spatial dimensions of the convolution input.
+  Blob<int> conv_input_shape_;
+  /// @brief The spatial dimensions of the input.
+  Blob<int> input_shape_;
+  /// @brief The spatial dimensions of the col_buffer.
+  vector<int> col_buffer_shape_;
+  /// @brief The spatial dimensions of the output.
+  vector<int> output_shape_;
+
+  int num_spatial_axes_;
+  int bottom_dim_;
+  int top_dim_;
+
+  int channel_axis_;
   int num_;
   int channels_;
-  int pad_h_, pad_w_;
-  int height_, width_;
   int group_;
   int num_output_;
-  int height_out_, width_out_;
   bool bias_term_;
   bool is_1x1_;
 
  private:
   // wrap im2col/col2im so we don't have to remember the (long) argument lists
   inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) {
-    im2col_cpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
-        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
+    im2col_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),
+        col_buffer_shape_.data(), kernel_shape_.cpu_data(),
+        pad_.cpu_data(), stride_.cpu_data(), col_buff);
   }
   inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
-    col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
-        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
+    col2im_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
+        col_buffer_shape_.data(), kernel_shape_.cpu_data(),
+        pad_.cpu_data(), stride_.cpu_data(), data);
   }
 #ifndef CPU_ONLY
   inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
-    im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
-        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff);
+    im2col_gpu(data, num_spatial_axes_, num_kernels_im2col_,
+        conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
+        kernel_shape_.gpu_data(), pad_.gpu_data(),
+        stride_.gpu_data(), col_buff);
   }
   inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
-    col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
-        kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
+    col2im_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
+        conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
+        kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
+        data);
   }
 #endif
 
+  int num_kernels_im2col_;
+  int num_kernels_col2im_;
   int conv_out_channels_;
   int conv_in_channels_;
   int conv_out_spatial_dim_;
-  int conv_in_height_;
-  int conv_in_width_;
+  int out_spatial_dim_;
   int kernel_dim_;
   int weight_offset_;
   int col_offset_;
@@ -287,11 +309,26 @@ class Im2colLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
-  int kernel_h_, kernel_w_;
-  int stride_h_, stride_w_;
+  /// @brief The spatial dimensions of a filter kernel.
+  Blob<int> kernel_shape_;
+  /// @brief The spatial dimensions of the stride.
+  Blob<int> stride_;
+  /// @brief The spatial dimensions of the padding.
+  Blob<int> pad_;
+  /// @brief The (full) shape of the input.
+  Blob<int>* input_shape_;
+  /// @brief The (full) shape of the conv input.
+  Blob<int>* conv_input_shape_;
+  /// @brief The spatial dimensions of the output col.
+  vector<int> col_shape_;
+
+  int num_spatial_axes_;
+  int bottom_dim_;
+  int top_dim_;
+
+  int channel_axis_;
+  int num_;
   int channels_;
-  int height_, width_;
-  int pad_h_, pad_w_;
 };
 
 // Forward declare PoolingLayer and SplitLayer for use in LRNLayer.
@@ -409,12 +446,24 @@ class PoolingLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
-  int kernel_h_, kernel_w_;
-  int stride_h_, stride_w_;
-  int pad_h_, pad_w_;
+  /// @brief The spatial dimensions of a filter kernel.
+  Blob<int> kernel_shape_;
+  /// @brief The spatial dimensions of the stride.
+  Blob<int> stride_;
+  /// @brief The spatial dimensions of the padding.
+  Blob<int> pad_;
+  /// @brief The spatial dimensions of the input.
+  Blob<int> input_shape_;
+  /// @brief The spatial dimensions of the output.
+  Blob<int> output_shape_;
+
+  int num_spatial_axes_;
+  int bottom_dim_;
+  int top_dim_;
+
+  int channel_axis_;
   int channels_;
-  int height_, width_;
-  int pooled_height_, pooled_width_;
+  int num_;
   bool global_pooling_;
   Blob<Dtype> rand_idx_;
   Blob<int> max_idx_;

diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
@@ -24,11 +24,14 @@ void Blob<Dtype>::Reshape(const vector<int>& shape) {
   CHECK_LE(shape.size(), kMaxBlobAxes);
   count_ = 1;
   shape_.resize(shape.size());
+  shape_data_.reset(new SyncedMemory(shape.size() * sizeof(int)));
+  int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
   for (int i = 0; i < shape.size(); ++i) {
     CHECK_GE(shape[i], 0);
     CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
     count_ *= shape[i];
     shape_[i] = shape[i];
+    shape_data[i] = shape[i];
   }
   if (count_ > capacity_) {
     capacity_ = count_;
@@ -67,6 +70,12 @@ Blob<Dtype>::Blob(const vector<int>& shape)
   Reshape(shape);
 }
 
+template <typename Dtype>
+const int* Blob<Dtype>::gpu_shape() const {
+  CHECK(shape_data_);
+  return (const int*)shape_data_->gpu_data();
+}
+
 template <typename Dtype>
 const Dtype* Blob<Dtype>::cpu_data() const {
   CHECK(data_);