From f6ffd8ef0f14c6889c09efe2423c56118b0c3539 Mon Sep 17 00:00:00 2001 From: TANGUY Arnaud Date: Wed, 20 Aug 2014 18:37:54 +0200 Subject: [PATCH] Refactor DataLayer using a new DataTransformer Start the refactoring of the datalayers to avoid data transformation code duplication. So far, only DataLayer has been done. --- include/caffe/data_layers.hpp | 8 +- include/caffe/data_transformer.hpp | 55 +++++++++++++ src/caffe/data_transformer.cpp | 114 ++++++++++++++++++++++++++ src/caffe/layers/data_layer.cpp | 98 +++------------------- src/caffe/proto/caffe.proto | 33 +++++--- src/caffe/test/test_data_layer.cpp | 29 +++++-- src/caffe/test/test_net.cpp | 16 +++- src/caffe/test/test_split_layer.cpp | 8 +- src/caffe/test/test_upgrade_proto.cpp | 26 +++--- src/caffe/util/upgrade_proto.cpp | 14 ++-- 10 files changed, 267 insertions(+), 134 deletions(-) create mode 100644 include/caffe/data_transformer.hpp create mode 100644 src/caffe/data_transformer.cpp diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 8ab92bfc0a5..06508eabf57 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -12,6 +12,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/data_transformer.hpp" #include "caffe/filler.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" @@ -24,12 +25,12 @@ namespace caffe { // TODO: DataLayer, ImageDataLayer, and WindowDataLayer all have the // same basic structure and a lot of duplicated code. - template class DataLayer : public Layer, public InternalThread { public: explicit DataLayer(const LayerParameter& param) - : Layer(param) {} + : Layer(param), + data_transformer_(param.data_param().transform_param()) {} virtual ~DataLayer(); virtual void LayerSetUp(const vector*>& bottom, vector*>* top); @@ -53,11 +54,10 @@ class DataLayer : public Layer, public InternalThread { virtual void CreatePrefetchThread(); virtual void JoinPrefetchThread(); - virtual unsigned int PrefetchRand(); // The thread's function virtual void InternalThreadEntry(); - shared_ptr prefetch_rng_; + DataTransformer data_transformer_; // LEVELDB shared_ptr db_; diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp new file mode 100644 index 00000000000..302bf70e606 --- /dev/null +++ b/include/caffe/data_transformer.hpp @@ -0,0 +1,55 @@ +#ifndef CAFFE_DATA_TRANSFORMER_HPP +#define CAFFE_DATA_TRANSFORMER_HPP + +#include "caffe/common.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Applies common transformations to the input data, such as + * scaling, mirroring, substracting the image mean... + */ +template +class DataTransformer { + public: + explicit DataTransformer(const TransformationParameter& param) + : param_(param) { + phase_ = Caffe::phase(); + } + virtual ~DataTransformer() {} + + void InitRand(); + + /** + * @brief Applies the transformation defined in the data layer's + * transform_param block to the data. + * + * @param batch_item_id + * Datum position within the batch. This is used to compute the + * writing position in the top blob's data + * @param datum + * Datum containing the data to be transformed. + * @param mean + * @param top_data + * This is meant to be the top blob's data. The transformed data will be + * written at the appropriate place within the blob's data. + */ + void Transform(const int batch_item_id, const Datum& datum, + const Dtype* mean, Dtype* transformed_data); + + protected: + virtual unsigned int Rand(); + + // Tranformation parameters + TransformationParameter param_; + + + shared_ptr rng_; + Caffe::Phase phase_; +}; + +} // namespace caffe + +#endif // CAFFE_DATA_TRANSFORMER_HPP_ + diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp new file mode 100644 index 00000000000..609c06d6c31 --- /dev/null +++ b/src/caffe/data_transformer.cpp @@ -0,0 +1,114 @@ +#include + +#include "caffe/data_transformer.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/util/rng.hpp" + +namespace caffe { + +template +void DataTransformer::Transform(const int batch_item_id, + const Datum& datum, + const Dtype* mean, + Dtype* transformed_data) { + + const string& data = datum.data(); + const int channels = datum.channels(); + const int height = datum.height(); + const int width = datum.width(); + const int size = datum.channels() * datum.height() * datum.width(); + + const int crop_size = param_.crop_size(); + const bool mirror = param_.mirror(); + const Dtype scale = param_.scale(); + + + + if (mirror && crop_size == 0) { + LOG(FATAL) << "Current implementation requires mirror and crop_size to be " + << "set at the same time."; + } + + if (crop_size) { + CHECK(data.size()) << "Image cropping only support uint8 data"; + int h_off, w_off; + // We only do random crop when we do training. + if (phase_ == Caffe::TRAIN) { + h_off = Rand() % (height - crop_size); + w_off = Rand() % (width - crop_size); + } else { + h_off = (height - crop_size) / 2; + w_off = (width - crop_size) / 2; + } + if (mirror && Rand() % 2) { + // Copy mirrored version + for (int c = 0; c < channels; ++c) { + for (int h = 0; h < crop_size; ++h) { + for (int w = 0; w < crop_size; ++w) { + int data_index = (c * height + h + h_off) * width + w + w_off; + int top_index = ((batch_item_id * channels + c) * crop_size + h) + * crop_size + (crop_size - 1 - w); + Dtype datum_element = + static_cast(static_cast(data[data_index])); + transformed_data[top_index] = + (datum_element - mean[data_index]) * scale; + } + } + } + } else { + // Normal copy + for (int c = 0; c < channels; ++c) { + for (int h = 0; h < crop_size; ++h) { + for (int w = 0; w < crop_size; ++w) { + int top_index = ((batch_item_id * channels + c) * crop_size + h) + * crop_size + w; + int data_index = (c * height + h + h_off) * width + w + w_off; + Dtype datum_element = + static_cast(static_cast(data[data_index])); + transformed_data[top_index] = + (datum_element - mean[data_index]) * scale; + } + } + } + } + } else { + // we will prefer to use data() first, and then try float_data() + if (data.size()) { + for (int j = 0; j < size; ++j) { + Dtype datum_element = + static_cast(static_cast(data[j])); + transformed_data[j + batch_item_id * size] = + (datum_element - mean[j]) * scale; + } + } else { + for (int j = 0; j < size; ++j) { + transformed_data[j + batch_item_id * size] = + (datum.float_data(j) - mean[j]) * scale; + } + } + } +} + +template +void DataTransformer::InitRand() { + const bool needs_rand = (phase_ == Caffe::TRAIN) && + (param_.mirror() || param_.crop_size()); + if (needs_rand) { + const unsigned int rng_seed = caffe_rng_rand(); + rng_.reset(new Caffe::RNG(rng_seed)); + } else { + rng_.reset(); + } +} + +template +unsigned int DataTransformer::Rand() { + CHECK(rng_); + caffe::rng_t* rng = + static_cast(rng_->generator()); + return (*rng)(); +} + +INSTANTIATE_CLASS(DataTransformer); + +} // namespace caffe diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index c2b0c73a53b..b6ac5d6b351 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -23,20 +23,8 @@ void DataLayer::InternalThreadEntry() { if (output_labels_) { top_label = prefetch_label_.mutable_cpu_data(); } - const Dtype scale = this->layer_param_.data_param().scale(); const int batch_size = this->layer_param_.data_param().batch_size(); - const int crop_size = this->layer_param_.data_param().crop_size(); - const bool mirror = this->layer_param_.data_param().mirror(); - if (mirror && crop_size == 0) { - LOG(FATAL) << "Current implementation requires mirror and crop_size to be " - << "set at the same time."; - } - // datum scales - const int channels = datum_channels_; - const int height = datum_height_; - const int width = datum_width_; - const int size = datum_size_; const Dtype* mean = data_mean_.cpu_data(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob @@ -56,66 +44,13 @@ void DataLayer::InternalThreadEntry() { LOG(FATAL) << "Unknown database backend"; } - const string& data = datum.data(); - if (crop_size) { - CHECK(data.size()) << "Image cropping only support uint8 data"; - int h_off, w_off; - // We only do random crop when we do training. - if (phase_ == Caffe::TRAIN) { - h_off = PrefetchRand() % (height - crop_size); - w_off = PrefetchRand() % (width - crop_size); - } else { - h_off = (height - crop_size) / 2; - w_off = (width - crop_size) / 2; - } - if (mirror && PrefetchRand() % 2) { - // Copy mirrored version - for (int c = 0; c < channels; ++c) { - for (int h = 0; h < crop_size; ++h) { - for (int w = 0; w < crop_size; ++w) { - int top_index = ((item_id * channels + c) * crop_size + h) - * crop_size + (crop_size - 1 - w); - int data_index = (c * height + h + h_off) * width + w + w_off; - Dtype datum_element = - static_cast(static_cast(data[data_index])); - top_data[top_index] = (datum_element - mean[data_index]) * scale; - } - } - } - } else { - // Normal copy - for (int c = 0; c < channels; ++c) { - for (int h = 0; h < crop_size; ++h) { - for (int w = 0; w < crop_size; ++w) { - int top_index = ((item_id * channels + c) * crop_size + h) - * crop_size + w; - int data_index = (c * height + h + h_off) * width + w + w_off; - Dtype datum_element = - static_cast(static_cast(data[data_index])); - top_data[top_index] = (datum_element - mean[data_index]) * scale; - } - } - } - } - } else { - // we will prefer to use data() first, and then try float_data() - if (data.size()) { - for (int j = 0; j < size; ++j) { - Dtype datum_element = - static_cast(static_cast(data[j])); - top_data[item_id * size + j] = (datum_element - mean[j]) * scale; - } - } else { - for (int j = 0; j < size; ++j) { - top_data[item_id * size + j] = - (datum.float_data(j) - mean[j]) * scale; - } - } - } + // Apply data transformations (mirror, scale, crop...) + data_transformer_.Transform(item_id, datum, mean, top_data); if (output_labels_) { top_label[item_id] = datum.label(); } + // go to the next iter switch (this->layer_param_.data_param().backend()) { case DataParameter_DB_LEVELDB: @@ -244,7 +179,7 @@ void DataLayer::LayerSetUp(const vector*>& bottom, } // image - int crop_size = this->layer_param_.data_param().crop_size(); + int crop_size = this->layer_param_.data_param().transform_param().crop_size(); if (crop_size > 0) { (*top)[0]->Reshape(this->layer_param_.data_param().batch_size(), datum.channels(), crop_size, crop_size); @@ -274,8 +209,9 @@ void DataLayer::LayerSetUp(const vector*>& bottom, CHECK_GT(datum_height_, crop_size); CHECK_GT(datum_width_, crop_size); // check if we want to have mean - if (this->layer_param_.data_param().has_mean_file()) { - const string& mean_file = this->layer_param_.data_param().mean_file(); + if (this->layer_param_.data_param().transform_param().has_mean_file()) { + const string& mean_file = + this->layer_param_.data_param().transform_param().mean_file(); LOG(INFO) << "Loading mean file from" << mean_file; BlobProto blob_proto; ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); @@ -305,15 +241,9 @@ void DataLayer::LayerSetUp(const vector*>& bottom, template void DataLayer::CreatePrefetchThread() { phase_ = Caffe::phase(); - const bool prefetch_needs_rand = (phase_ == Caffe::TRAIN) && - (this->layer_param_.data_param().mirror() || - this->layer_param_.data_param().crop_size()); - if (prefetch_needs_rand) { - const unsigned int prefetch_rng_seed = caffe_rng_rand(); - prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); - } else { - prefetch_rng_.reset(); - } + + data_transformer_.InitRand(); + CHECK(!StartInternalThread()) << "Pthread execution failed"; } @@ -322,14 +252,6 @@ void DataLayer::JoinPrefetchThread() { CHECK(!WaitForInternalThreadToExit()) << "Pthread joining failed"; } -template -unsigned int DataLayer::PrefetchRand() { - CHECK(prefetch_rng_); - caffe::rng_t* prefetch_rng = - static_cast(prefetch_rng_->generator()); - return (*prefetch_rng)(); -} - template void DataLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 428ba2b406d..b7c6bca8194 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -344,6 +344,20 @@ message ConvolutionParameter { optional FillerParameter bias_filler = 8; // The filler for the bias } +// Message that stores parameters used to apply transformation +// to the data layer's data +message TransformationParameter { + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 1 [default = 1]; + // Specify if we want to randomly mirror data. + optional bool mirror = 2 [default = false]; + // Specify if we would like to randomly crop an image. + optional uint32 crop_size = 3 [default = 0]; + optional string mean_file = 4; +} + // Message that stores parameters used by DataLayer message DataParameter { enum DB { @@ -352,23 +366,18 @@ message DataParameter { } // Specify the data source. optional string source = 1; - // For data pre-processing, we can do simple scaling and subtracting the - // data mean, if provided. Note that the mean subtraction is always carried - // out before scaling. - optional float scale = 2 [default = 1]; - optional string mean_file = 3; // Specify the batch size. - optional uint32 batch_size = 4; - // Specify if we would like to randomly crop an image. - optional uint32 crop_size = 5 [default = 0]; - // Specify if we want to randomly mirror data. - optional bool mirror = 6 [default = false]; + optional uint32 batch_size = 3; // The rand_skip variable is for the data layer to skip a few data points // to avoid all asynchronous sgd clients to start at the same point. The skip // point would be set as rand_skip * rand(0,1). Note that rand_skip should not // be larger than the number of keys in the leveldb. - optional uint32 rand_skip = 7 [default = 0]; - optional DB backend = 8 [default = LEVELDB]; + optional uint32 rand_skip = 4 [default = 0]; + + // Parameters for data pre-processing. + optional TransformationParameter transform_param = 5; + + optional DB backend = 6 [default = LEVELDB]; } // Message that stores parameters used by DropoutLayer diff --git a/src/caffe/test/test_data_layer.cpp b/src/caffe/test/test_data_layer.cpp index 208beed91d1..5c21f2e745d 100644 --- a/src/caffe/test/test_data_layer.cpp +++ b/src/caffe/test/test_data_layer.cpp @@ -118,9 +118,13 @@ class DataLayerTest : public MultiDeviceTest { LayerParameter param; DataParameter* data_param = param.mutable_data_param(); data_param->set_batch_size(5); - data_param->set_scale(scale); data_param->set_source(filename_->c_str()); data_param->set_backend(backend_); + + TransformationParameter* transform_param = + data_param->mutable_transform_param(); + transform_param->set_scale(scale); + DataLayer layer(param); layer.SetUp(blob_bottom_vec_, &blob_top_vec_); EXPECT_EQ(blob_top_data_->num(), 5); @@ -150,12 +154,17 @@ class DataLayerTest : public MultiDeviceTest { const Dtype scale = 3; LayerParameter param; Caffe::set_random_seed(1701); + DataParameter* data_param = param.mutable_data_param(); data_param->set_batch_size(5); - data_param->set_scale(scale); - data_param->set_crop_size(1); data_param->set_source(filename_->c_str()); data_param->set_backend(backend_); + + TransformationParameter* transform_param = + data_param->mutable_transform_param(); + transform_param->set_scale(scale); + transform_param->set_crop_size(1); + DataLayer layer(param); layer.SetUp(blob_bottom_vec_, &blob_top_vec_); EXPECT_EQ(blob_top_data_->num(), 5); @@ -198,11 +207,14 @@ class DataLayerTest : public MultiDeviceTest { LayerParameter param; DataParameter* data_param = param.mutable_data_param(); data_param->set_batch_size(5); - data_param->set_crop_size(1); - data_param->set_mirror(true); data_param->set_source(filename_->c_str()); data_param->set_backend(backend_); + TransformationParameter* transform_param = + data_param->mutable_transform_param(); + transform_param->set_crop_size(1); + transform_param->set_mirror(true); + // Get crop sequence with Caffe seed 1701. Caffe::set_random_seed(seed_); vector > crop_sequence; @@ -249,11 +261,14 @@ class DataLayerTest : public MultiDeviceTest { LayerParameter param; DataParameter* data_param = param.mutable_data_param(); data_param->set_batch_size(5); - data_param->set_crop_size(1); - data_param->set_mirror(true); data_param->set_source(filename_->c_str()); data_param->set_backend(backend_); + TransformationParameter* transform_param = + data_param->mutable_transform_param(); + transform_param->set_crop_size(1); + transform_param->set_mirror(true); + // Get crop sequence with Caffe seed 1701, srand seed 1701. Caffe::set_random_seed(seed_); srand(seed_); diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 395e3523418..f0a368245a5 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -1177,7 +1177,9 @@ TEST_F(FilterNetTest, TestFilterLeNetTrainTest) { " top: 'label' " " data_param { " " source: 'mnist-train-leveldb' " - " scale: 0.00390625 " + " transform_param { " + " scale: 0.00390625 " + " } " " batch_size: 64 " " } " " include: { phase: TRAIN } " @@ -1189,7 +1191,9 @@ TEST_F(FilterNetTest, TestFilterLeNetTrainTest) { " top: 'label' " " data_param { " " source: 'mnist-test-leveldb' " - " scale: 0.00390625 " + " transform_param { " + " scale: 0.00390625 " + " } " " batch_size: 100 " " } " " include: { phase: TEST } " @@ -1256,7 +1260,9 @@ TEST_F(FilterNetTest, TestFilterLeNetTrainTest) { " top: 'label' " " data_param { " " source: 'mnist-train-leveldb' " - " scale: 0.00390625 " + " transform_param { " + " scale: 0.00390625 " + " } " " batch_size: 64 " " } " " include: { phase: TRAIN } " @@ -1313,7 +1319,9 @@ TEST_F(FilterNetTest, TestFilterLeNetTrainTest) { " top: 'label' " " data_param { " " source: 'mnist-test-leveldb' " - " scale: 0.00390625 " + " transform_param { " + " scale: 0.00390625 " + " } " " batch_size: 100 " " } " " include: { phase: TEST } " diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp index 92a3ec77b93..bee73f64568 100644 --- a/src/caffe/test/test_split_layer.cpp +++ b/src/caffe/test/test_split_layer.cpp @@ -180,10 +180,12 @@ TEST_F(SplitLayerInsertionTest, TestNoInsertionImageNet) { " type: DATA " " data_param { " " source: '/home/jiayq/Data/ILSVRC12/train-leveldb' " - " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " " batch_size: 256 " - " crop_size: 227 " - " mirror: true " + " transform_param { " + " crop_size: 227 " + " mirror: true " + " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " + " } " " } " " top: 'data' " " top: 'label' " diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp index 2abcadc0862..3e9ab2194ee 100644 --- a/src/caffe/test/test_upgrade_proto.cpp +++ b/src/caffe/test/test_upgrade_proto.cpp @@ -1189,10 +1189,12 @@ TEST_F(V0UpgradeTest, TestSimple) { " type: DATA " " data_param { " " source: '/home/jiayq/Data/ILSVRC12/train-leveldb' " - " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " " batch_size: 256 " - " crop_size: 227 " - " mirror: true " + " transform_param { " + " crop_size: 227 " + " mirror: true " + " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " + " } " " } " " top: 'data' " " top: 'label' " @@ -1523,11 +1525,13 @@ TEST_F(V0UpgradeTest, TestAllParams) { " type: DATA " " data_param { " " source: '/home/jiayq/Data/ILSVRC12/train-leveldb' " - " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " " batch_size: 256 " - " crop_size: 227 " - " mirror: true " - " scale: 0.25 " + " transform_param { " + " crop_size: 227 " + " mirror: true " + " scale: 0.25 " + " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " + " } " " rand_skip: 73 " " } " " top: 'data' " @@ -2119,10 +2123,12 @@ TEST_F(V0UpgradeTest, TestImageNet) { " type: DATA " " data_param { " " source: '/home/jiayq/Data/ILSVRC12/train-leveldb' " - " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " " batch_size: 256 " - " crop_size: 227 " - " mirror: true " + " transform_param { " + " crop_size: 227 " + " mirror: true " + " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' " + " } " " } " " top: 'data' " " top: 'label' " diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp index 5415ca83b60..48eb579fe00 100644 --- a/src/caffe/util/upgrade_proto.cpp +++ b/src/caffe/util/upgrade_proto.cpp @@ -307,7 +307,8 @@ bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection, } if (v0_layer_param.has_scale()) { if (type == "data") { - layer_param->mutable_data_param()->set_scale(v0_layer_param.scale()); + layer_param->mutable_data_param()->mutable_transform_param()-> + set_scale(v0_layer_param.scale()); } else if (type == "images") { layer_param->mutable_image_data_param()->set_scale( v0_layer_param.scale()); @@ -318,8 +319,8 @@ bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection, } if (v0_layer_param.has_meanfile()) { if (type == "data") { - layer_param->mutable_data_param()->set_mean_file( - v0_layer_param.meanfile()); + layer_param->mutable_data_param()->mutable_transform_param()-> + set_mean_file(v0_layer_param.meanfile()); } else if (type == "images") { layer_param->mutable_image_data_param()->set_mean_file( v0_layer_param.meanfile()); @@ -351,8 +352,8 @@ bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection, } if (v0_layer_param.has_cropsize()) { if (type == "data") { - layer_param->mutable_data_param()->set_crop_size( - v0_layer_param.cropsize()); + layer_param->mutable_data_param()->mutable_transform_param()-> + set_crop_size(v0_layer_param.cropsize()); } else if (type == "images") { layer_param->mutable_image_data_param()->set_crop_size( v0_layer_param.cropsize()); @@ -366,7 +367,8 @@ bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection, } if (v0_layer_param.has_mirror()) { if (type == "data") { - layer_param->mutable_data_param()->set_mirror(v0_layer_param.mirror()); + layer_param->mutable_data_param()->mutable_transform_param()-> + set_mirror(v0_layer_param.mirror()); } else if (type == "images") { layer_param->mutable_image_data_param()->set_mirror( v0_layer_param.mirror());