-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #147 from sergeyk/hdf5_data
HDF5DataLayer: read matrix of features and labels from HDF5 file as input
- Loading branch information
Showing
10 changed files
with
355 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
/* | ||
TODO: | ||
- only load parts of the file, in accordance with a prototxt param "max_mem" | ||
*/ | ||
|
||
#include <iostream> | ||
#include <stdint.h> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "hdf5.h" | ||
#include "hdf5_hl.h" | ||
|
||
#include "caffe/layer.hpp" | ||
#include "caffe/util/io.hpp" | ||
#include "caffe/vision_layers.hpp" | ||
|
||
using std::string; | ||
|
||
namespace caffe { | ||
|
||
template <typename Dtype> | ||
HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { } | ||
|
||
template <typename Dtype> | ||
void HDF5DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, | ||
vector<Blob<Dtype>*>* top) { | ||
CHECK_EQ(bottom.size(), 0) << "HDF5DataLayer takes no input blobs."; | ||
CHECK_EQ(top->size(), 2) << "HDF5DataLayer takes two blobs as output."; | ||
|
||
// Load the HDF5 file and initialize the counter. | ||
const char* hdf_filename = this->layer_param_.source().c_str(); | ||
LOG(INFO) << "Loading HDF5 file" << hdf_filename; | ||
hid_t file_id = H5Fopen(hdf_filename, H5F_ACC_RDONLY, H5P_DEFAULT); | ||
load_2d_dataset(file_id, "data", &data, data_dims); | ||
load_2d_dataset(file_id, "label", &label, label_dims); | ||
herr_t status = H5Fclose(file_id); | ||
assert(data_dims[0] == label_dims[0]); | ||
current_row = 0; | ||
|
||
// Reshape blobs. | ||
(*top)[0]->Reshape(this->layer_param_.batchsize(), data_dims[1], 1, 1); | ||
(*top)[1]->Reshape(this->layer_param_.batchsize(), label_dims[1], 1, 1); | ||
LOG(INFO) << "output data size: " << (*top)[0]->num() << "," | ||
<< (*top)[0]->channels() << "," << (*top)[0]->height() << "," | ||
<< (*top)[0]->width(); | ||
} | ||
|
||
template <typename Dtype> | ||
void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
vector<Blob<Dtype>*>* top) { | ||
const int batchsize = this->layer_param_.batchsize(); | ||
for (int i = 0; i < batchsize; ++i, ++current_row) { | ||
if (current_row == data_dims[0]) { | ||
current_row = 0; | ||
} | ||
|
||
memcpy( &(*top)[0]->mutable_cpu_data()[i * data_dims[1]], | ||
&(data.get()[current_row * data_dims[1]]), | ||
sizeof(Dtype) * data_dims[1]); | ||
|
||
memcpy( &(*top)[1]->mutable_cpu_data()[i * label_dims[1]], | ||
&(label.get()[current_row * label_dims[1]]), | ||
sizeof(Dtype) * label_dims[1]); | ||
} | ||
} | ||
|
||
template <typename Dtype> | ||
void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | ||
vector<Blob<Dtype>*>* top) { | ||
const int batchsize = this->layer_param_.batchsize(); | ||
for (int i = 0; i < batchsize; ++i, ++current_row) { | ||
if (current_row == data_dims[0]) { | ||
current_row = 0; | ||
} | ||
|
||
CUDA_CHECK(cudaMemcpy( | ||
&(*top)[0]->mutable_gpu_data()[i * data_dims[1]], | ||
&(data.get()[current_row * data_dims[1]]), | ||
sizeof(Dtype) * data_dims[1], | ||
cudaMemcpyHostToDevice)); | ||
|
||
CUDA_CHECK(cudaMemcpy( | ||
&(*top)[1]->mutable_gpu_data()[i * label_dims[1]], | ||
&(label.get()[current_row * label_dims[1]]), | ||
sizeof(Dtype) * label_dims[1], | ||
cudaMemcpyHostToDevice)); | ||
} | ||
} | ||
|
||
// The backward operations are dummy - they do not carry any computation. | ||
template <typename Dtype> | ||
Dtype HDF5DataLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { | ||
return Dtype(0.); | ||
} | ||
|
||
template <typename Dtype> | ||
Dtype HDF5DataLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | ||
const bool propagate_down, vector<Blob<Dtype>*>* bottom) { | ||
return Dtype(0.); | ||
} | ||
|
||
INSTANTIATE_CLASS(HDF5DataLayer); | ||
|
||
} // namespace caffe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
""" | ||
Generate data used in the HDF5DataLayer test. | ||
""" | ||
|
||
import numpy as np | ||
import h5py | ||
|
||
num_cols = 8 | ||
num_rows = 10 | ||
data = np.arange(num_cols * num_rows).reshape(num_rows, num_cols) | ||
label = np.arange(num_rows)[:, np.newaxis] | ||
print data | ||
print label | ||
|
||
with h5py.File('./sample_data.h5', 'w') as f: | ||
f['data'] = data.astype('float32') | ||
f['label'] = label.astype('float32') |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
// Copyright 2013 Yangqing Jia | ||
|
||
#include <cuda_runtime.h> | ||
#include <leveldb/db.h> | ||
|
||
#include <string> | ||
|
||
#include "gtest/gtest.h" | ||
#include "caffe/blob.hpp" | ||
#include "caffe/common.hpp" | ||
#include "caffe/filler.hpp" | ||
#include "caffe/vision_layers.hpp" | ||
#include "caffe/proto/caffe.pb.h" | ||
#include "caffe/test/test_caffe_main.hpp" | ||
|
||
using std::string; | ||
|
||
namespace caffe { | ||
|
||
extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; | ||
|
||
template <typename Dtype> | ||
class HDF5DataLayerTest : public ::testing::Test { | ||
protected: | ||
HDF5DataLayerTest() | ||
: blob_top_data_(new Blob<Dtype>()), | ||
blob_top_label_(new Blob<Dtype>()), | ||
filename(NULL) {}; | ||
virtual void SetUp() { | ||
blob_top_vec_.push_back(blob_top_data_); | ||
blob_top_vec_.push_back(blob_top_label_); | ||
|
||
// TODO: generate sample HDF5 file on the fly. | ||
// For now, use example HDF5 file. | ||
// TODO: how to best deal with the relativeness of the path? | ||
filename = "src/caffe/test/test_data/sample_data.h5"; | ||
LOG(INFO) << "Using sample HDF5 data file " << filename; | ||
}; | ||
|
||
virtual ~HDF5DataLayerTest() { | ||
delete blob_top_data_; | ||
delete blob_top_label_; | ||
} | ||
|
||
char* filename; | ||
Blob<Dtype>* const blob_top_data_; | ||
Blob<Dtype>* const blob_top_label_; | ||
vector<Blob<Dtype>*> blob_bottom_vec_; | ||
vector<Blob<Dtype>*> blob_top_vec_; | ||
}; | ||
|
||
typedef ::testing::Types<float, double> Dtypes; | ||
TYPED_TEST_CASE(HDF5DataLayerTest, Dtypes); | ||
|
||
TYPED_TEST(HDF5DataLayerTest, TestRead) { | ||
// Create LayerParameter with the known parameters. | ||
// The data file we are reading has 10 rows and 8 columns, | ||
// with values from 0 to 10*8 reshaped in row-major order. | ||
LayerParameter param; | ||
int batchsize = 5; | ||
param.set_batchsize(batchsize); | ||
param.set_source(this->filename); | ||
int num_rows = 10; | ||
int num_cols = 8; | ||
HDF5DataLayer<TypeParam> layer(param); | ||
|
||
// Test that the layer setup got the correct parameters. | ||
layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_); | ||
EXPECT_EQ(this->blob_top_data_->num(), batchsize); | ||
EXPECT_EQ(this->blob_top_data_->channels(), num_cols); | ||
EXPECT_EQ(this->blob_top_data_->height(), 1); | ||
EXPECT_EQ(this->blob_top_data_->width(), 1); | ||
|
||
EXPECT_EQ(this->blob_top_label_->num(), batchsize); | ||
EXPECT_EQ(this->blob_top_label_->channels(), 1); | ||
EXPECT_EQ(this->blob_top_label_->height(), 1); | ||
EXPECT_EQ(this->blob_top_label_->width(), 1); | ||
|
||
// Go through the data 100 times. | ||
for (int iter = 0; iter < 100; ++iter) { | ||
layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_); | ||
|
||
// On even iterations, we're reading the first half of the data. | ||
// On odd iterations, we're reading the second half of the data. | ||
int label_offset = (iter % 2 == 0) ? 0 : batchsize; | ||
int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols; | ||
|
||
for (int i = 0; i < batchsize; ++i) { | ||
EXPECT_EQ( | ||
label_offset + i, | ||
this->blob_top_label_->cpu_data()[i]); | ||
} | ||
for (int i = 0; i < batchsize; ++i) { | ||
for (int j = 0; j < num_cols; ++j) { | ||
EXPECT_EQ( | ||
data_offset + i * num_cols + j, | ||
this->blob_top_data_->cpu_data()[i * num_cols + j]) | ||
<< "debug: i " << i << " j " << j; | ||
} | ||
} | ||
} | ||
|
||
// Exact same test in GPU mode. | ||
Caffe::set_mode(Caffe::GPU); | ||
// Go through the data 100 times. | ||
for (int iter = 0; iter < 100; ++iter) { | ||
layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_); | ||
|
||
// On even iterations, we're reading the first half of the data. | ||
// On odd iterations, we're reading the second half of the data. | ||
int label_offset = (iter % 2 == 0) ? 0 : batchsize; | ||
int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols; | ||
|
||
for (int i = 0; i < batchsize; ++i) { | ||
EXPECT_EQ( | ||
label_offset + i, | ||
this->blob_top_label_->cpu_data()[i]); | ||
} | ||
for (int i = 0; i < batchsize; ++i) { | ||
for (int j = 0; j < num_cols; ++j) { | ||
EXPECT_EQ( | ||
data_offset + i * num_cols + j, | ||
this->blob_top_data_->cpu_data()[i * num_cols + j]) | ||
<< "debug: i " << i << " j " << j; | ||
} | ||
} | ||
} | ||
} | ||
|
||
} // namespace caffe |
Oops, something went wrong.