Merge pull request BVLC#1070 from sguada/move_data_mean

Refactor data_transform to allow datum, cv:Mat and Blob transformation
RazvanRanca · Oct 4, 2014 · 157aee9 · 157aee9
2 parents 518a6c5 + 0b4ecbb
commit 157aee9
Show file tree

Hide file tree

Showing 19 changed files with 1,153 additions and 194 deletions.
diff --git a/Makefile b/Makefile
@@ -252,6 +252,7 @@ ifeq ($(OSX), 1)
 	endif
 	# boost::thread is called boost_thread-mt to mark multithreading on OS X
 	LIBRARIES += boost_thread-mt
+        NVCCFLAGS += -DOSX
 endif
 
 # Custom compiler

diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
@@ -46,20 +46,9 @@ class BaseDataLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
 
-  int datum_channels() const { return datum_channels_; }
-  int datum_height() const { return datum_height_; }
-  int datum_width() const { return datum_width_; }
-  int datum_size() const { return datum_size_; }
-
  protected:
   TransformationParameter transform_param_;
   DataTransformer<Dtype> data_transformer_;
-  int datum_channels_;
-  int datum_height_;
-  int datum_width_;
-  int datum_size_;
-  Blob<Dtype> data_mean_;
-  const Dtype* mean_;
   Caffe::Phase phase_;
   bool output_labels_;
 };
@@ -90,6 +79,7 @@ class BasePrefetchingDataLayer :
  protected:
   Blob<Dtype> prefetch_data_;
   Blob<Dtype> prefetch_label_;
+  Blob<Dtype> transformed_data_;
 };
 
 template <typename Dtype>
@@ -294,12 +284,15 @@ class MemoryDataLayer : public BaseDataLayer<Dtype> {
   void Reset(Dtype* data, Dtype* label, int n);
 
   int batch_size() { return batch_size_; }
+  int channels() { return channels_; }
+  int height() { return height_; }
+  int width() { return width_; }
 
  protected:
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
-  int batch_size_;
+  int batch_size_, channels_, height_, width_, size_;
   Dtype* data_;
   Dtype* labels_;
   int n_;
@@ -339,6 +332,7 @@ class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {
   enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };
   vector<vector<float> > fg_windows_;
   vector<vector<float> > bg_windows_;
+  Blob<Dtype> data_mean_;
 };
 
 }  // namespace caffe

diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp
@@ -1,6 +1,13 @@
 #ifndef CAFFE_DATA_TRANSFORMER_HPP
 #define CAFFE_DATA_TRANSFORMER_HPP
 
+#ifndef OSX
+#include <opencv2/core/core.hpp>
+#endif
+
+#include <vector>
+
+#include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
 
@@ -13,48 +20,87 @@ namespace caffe {
 template <typename Dtype>
 class DataTransformer {
  public:
-  explicit DataTransformer(const TransformationParameter& param)
-    : param_(param) {
-    phase_ = Caffe::phase();
-  }
+  explicit DataTransformer(const TransformationParameter& param);
   virtual ~DataTransformer() {}
 
+  /**
+   * @brief Initialize the Random number generations if needed by the
+   *    transformation.
+   */
   void InitRand();
 
   /**
    * @brief Applies the transformation defined in the data layer's
    * transform_param block to the data.
    *
-   * @param batch_item_id
-   *    Datum position within the batch. This is used to compute the
-   *    writing position in the top blob's data
    * @param datum
    *    Datum containing the data to be transformed.
-   * @param mean
-   * @param transformed_data
-   *    This is meant to be the top blob's data. The transformed data will be
-   *    written at the appropriate place within the blob's data.
+   * @param transformed_blob
+   *    This is destination blob. It can be part of top blob's data if
+   *    set_cpu_data() is used See data_layer.cpp for an example.
+   */
+  void Transform(const Datum& datum, Blob<Dtype>* transformed_blob);
+
+  /**
+   * @brief Applies the transformation defined in the data layer's
+   * transform_param block to a vector of Datum.
+   *
+   * @param datum_vector
+   *    A vector of Datum containing the data to be transformed.
+   * @param transformed_blob
+   *    This is destination blob. It can be part of top blob's data if
+   *    set_cpu_data() is used See memory_layer.cpp for an example.
+   */
+  void Transform(const vector<Datum> & datum_vector,
+                Blob<Dtype>* transformed_blob);
+
+  /**
+   * @brief Applies the transformation defined in the data layer's
+   * transform_param block to a cv::Mat
+   *
+   * @param cv_img
+   *    cv::Mat containing the data to be transformed.
+   * @param transformed_blob
+   *    This is destination blob. It can be part of top blob's data if
+   *    set_cpu_data() is used See image_data_layer.cpp for an example.
+   */
+#ifndef OSX
+  void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob);
+#endif
+
+  /**
+   * @brief Applies the same transformation defined in the data layer's
+   * transform_param block to all the num images in a input_blob.
+   *
+   * @param input_blob
+   *    A Blob containing the data to be transformed. It applies the same
+   *    transformation to all the num images in the blob.
+   * @param transformed_blob
+   *    This is destination blob, it will contain as many images as the
+   *    input blob. It can be part of top blob's data.
    */
-  void Transform(const int batch_item_id, const Datum& datum,
-                 const Dtype* mean, Dtype* transformed_data);
+  void Transform(Blob<Dtype>* input_blob, Blob<Dtype>* transformed_blob);
 
  protected:
    /**
    * @brief Generates a random integer from Uniform({0, 1, ..., n-1}).
    * 
    * @param n
    *    The upperbound (exclusive) value of the random number.
-   * @return 
+   * @return
    *    A uniformly random integer value from ({0, 1, ..., n-1}).
    */
   virtual int Rand(int n);
 
+  void Transform(const Datum& datum, Dtype* transformed_data);
   // Tranformation parameters
   TransformationParameter param_;
 
 
   shared_ptr<Caffe::RNG> rng_;
   Caffe::Phase phase_;
+  Blob<Dtype> data_mean_;
+  vector<Dtype> mean_values_;
 };
 
 }  // namespace caffe

diff --git a/include/caffe/util/benchmark.hpp b/include/caffe/util/benchmark.hpp
@@ -14,6 +14,7 @@ class Timer {
   void Start();
   void Stop();
   float MilliSeconds();
+  float MicroSeconds();
   float Seconds();
 
   inline bool initted() { return initted_; }
@@ -33,6 +34,7 @@ class Timer {
   boost::posix_time::ptime start_cpu_;
   boost::posix_time::ptime stop_cpu_;
   float elapsed_milliseconds_;
+  float elapsed_microseconds_;
 };
 
 }  // namespace caffe

diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp
@@ -1,6 +1,10 @@
 #ifndef CAFFE_UTIL_IO_H_
 #define CAFFE_UTIL_IO_H_
 
+#ifndef OSX
+#include <opencv2/core/core.hpp>
+#endif
+
 #include <unistd.h>
 #include <string>
 
@@ -97,11 +101,37 @@ inline bool ReadImageToDatum(const string& filename, const int label,
   return ReadImageToDatum(filename, label, height, width, true, datum);
 }
 
+inline bool ReadImageToDatum(const string& filename, const int label,
+    const bool is_color, Datum* datum) {
+  return ReadImageToDatum(filename, label, 0, 0, is_color, datum);
+}
+
 inline bool ReadImageToDatum(const string& filename, const int label,
     Datum* datum) {
-  return ReadImageToDatum(filename, label, 0, 0, datum);
+  return ReadImageToDatum(filename, label, 0, 0, true, datum);
+}
+
+#ifndef OSX
+cv::Mat ReadImageToCVMat(const string& filename,
+    const int height, const int width, const bool is_color);
+
+inline cv::Mat ReadImageToCVMat(const string& filename,
+    const int height, const int width) {
+  return ReadImageToCVMat(filename, height, width, true);
+}
+
+inline cv::Mat ReadImageToCVMat(const string& filename,
+    const bool is_color) {
+  return ReadImageToCVMat(filename, 0, 0, is_color);
 }
 
+inline cv::Mat ReadImageToCVMat(const string& filename) {
+  return ReadImageToCVMat(filename, 0, 0, true);
+}
+
+void CVMatToDatum(const cv::Mat& cv_img, Datum* datum);
+#endif
+
 leveldb::Options GetLevelDBOptions();
 
 template <typename Dtype>

diff --git a/models/bvlc_reference_caffenet/train_val.prototxt b/models/bvlc_reference_caffenet/train_val.prototxt
@@ -14,6 +14,14 @@ layers {
     mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
     mirror: true
   }
+# mean pixel / channel-wise mean instead of mean image
+#  transform_param {
+#    crop_size: 227
+#    mean_value: 104
+#    mean_value: 117
+#    mean_value: 123
+#    mirror: true
+#  }
   include: { phase: TRAIN }
 }
 layers {
@@ -31,6 +39,14 @@ layers {
     mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
     mirror: false
   }
+# mean pixel / channel-wise mean instead of mean image
+#  transform_param {
+#    crop_size: 227
+#    mean_value: 104
+#    mean_value: 117
+#    mean_value: 123
+#    mirror: true
+#  }
   include: { phase: TEST }
 }
 layers {

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
@@ -108,8 +108,8 @@ void PyNet::set_input_arrays(bp::object data_obj, bp::object labels_obj) {
       reinterpret_cast<PyArrayObject*>(data_obj.ptr());
   PyArrayObject* labels_arr =
       reinterpret_cast<PyArrayObject*>(labels_obj.ptr());
-  check_contiguous_array(data_arr, "data array", md_layer->datum_channels(),
-      md_layer->datum_height(), md_layer->datum_width());
+  check_contiguous_array(data_arr, "data array", md_layer->channels(),
+      md_layer->height(), md_layer->width());
   check_contiguous_array(labels_arr, "labels array", 1, 1, 1);
   if (PyArray_DIMS(data_arr)[0] != PyArray_DIMS(labels_arr)[0]) {
     throw std::runtime_error("data and labels must have the same first"