Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

matcaffe interface improvements: maintain correct shape, add backward step, and get_weights function #132

Closed
wants to merge 8 commits into from
187 changes: 182 additions & 5 deletions matlab/caffe/matcaffe.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Copyright Ross Girshick and Yangqing Jia 2013
//
// Modified by Sergio Guadarrama 2014
// matcaffe.cpp provides a wrapper of the caffe::Net class as well as some
// caffe::Caffe functions so that one could easily call it from matlab.
// Note that for matlab, we will simply use float as the data type.
Expand All @@ -16,6 +16,7 @@ using namespace caffe; // NOLINT(build/namespaces)

// The pointer to the internal caffe::Net instance
static shared_ptr<Net<float> > net_;
static int init_key = -2;

// Five things to be aware of:
// caffe uses row-major order
Expand All @@ -25,7 +26,7 @@ static shared_ptr<Net<float> > net_;
// images need to have the data mean subtracted
//
// Data coming in from matlab needs to be in the order
// [batch_images, channels, height, width]
// [width, height, channels, images]
// where width is the fastest dimension.
// Here is the rough matlab for putting image data into the correct
// format:
Expand All @@ -42,6 +43,7 @@ static shared_ptr<Net<float> > net_;
//
// The actual forward function. It takes in a cell array of 4-D arrays as
// input and outputs a cell array.

static mxArray* do_forward(const mxArray* const bottom) {
vector<Blob<float>*>& input_blobs = net_->input_blobs();
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(bottom)[0]),
Expand All @@ -66,8 +68,11 @@ static mxArray* do_forward(const mxArray* const bottom) {
const vector<Blob<float>*>& output_blobs = net_->ForwardPrefilled();
mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1);
for (unsigned int i = 0; i < output_blobs.size(); ++i) {
mxArray* mx_blob = mxCreateNumericMatrix(output_blobs[i]->count(),
1, mxSINGLE_CLASS, mxREAL);
// internally data is stored as (width, height, channels, num)
// where width is the fastest dimension
mwSize dims[4] = {output_blobs[i]->width(), output_blobs[i]->height(),
output_blobs[i]->channels(), output_blobs[i]->num()};
mxArray* mx_blob = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_out, i, mx_blob);
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
switch (Caffe::mode()) {
Expand All @@ -87,7 +92,140 @@ static mxArray* do_forward(const mxArray* const bottom) {
return mx_out;
}

// The caffe::Caffe utility functions.
static mxArray* do_backward(const mxArray* const top_diff) {
vector<Blob<float>*>& output_blobs = net_->output_blobs();
vector<Blob<float>*>& input_blobs = net_->input_blobs();
CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(top_diff)[0]),
output_blobs.size());
// First, copy the output diff
for (unsigned int i = 0; i < output_blobs.size(); ++i) {
const mxArray* const elem = mxGetCell(top_diff, i);
const float* const data_ptr =
reinterpret_cast<const float* const>(mxGetPr(elem));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(output_blobs[i]->mutable_cpu_diff(), data_ptr,
sizeof(float) * output_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(output_blobs[i]->mutable_gpu_diff(), data_ptr,
sizeof(float) * output_blobs[i]->count(), cudaMemcpyHostToDevice);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}
// LOG(INFO) << "Start";
net_->Backward();
// LOG(INFO) << "End";
mxArray* mx_out = mxCreateCellMatrix(input_blobs.size(), 1);
for (unsigned int i = 0; i < input_blobs.size(); ++i) {
// internally data is stored as (width, height, channels, num)
// where width is the fastest dimension
mwSize dims[4] = {input_blobs[i]->width(), input_blobs[i]->height(),
input_blobs[i]->channels(), input_blobs[i]->num()};
mxArray* mx_blob = mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_out, i, mx_blob);
float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(data_ptr, input_blobs[i]->cpu_diff(),
sizeof(float) * input_blobs[i]->count());
break;
case Caffe::GPU:
cudaMemcpy(data_ptr, input_blobs[i]->gpu_diff(),
sizeof(float) * input_blobs[i]->count(), cudaMemcpyDeviceToHost);
break;
default:
LOG(FATAL) << "Unknown Caffe mode.";
} // switch (Caffe::mode())
}

return mx_out;
}

static mxArray* do_get_weights() {
const vector<shared_ptr<Layer<float> > >& layers = net_->layers();
const vector<string>& layer_names = net_->layer_names();

// Step 1: count the number of layers with weights
int num_layers = 0;
{
string prev_layer_name = "";
for (unsigned int i = 0; i < layers.size(); ++i) {
vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs();
if (layer_blobs.size() == 0) {
continue;
}
if (layer_names[i] != prev_layer_name) {
prev_layer_name = layer_names[i];
num_layers++;
}
}
}

// Step 2: prepare output array of structures
mxArray* mx_layers;
{
const mwSize dims[2] = {num_layers, 1};
const char* fnames[2] = {"weights", "layer_names"};
mx_layers = mxCreateStructArray(2, dims, 2, fnames);
}

// Step 3: copy weights into output
{
string prev_layer_name = "";
int mx_layer_index = 0;
for (unsigned int i = 0; i < layers.size(); ++i) {
vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs();
if (layer_blobs.size() == 0) {
continue;
}

mxArray* mx_layer_cells = NULL;
if (layer_names[i] != prev_layer_name) {
prev_layer_name = layer_names[i];
const mwSize dims[2] = {layer_blobs.size(), 1};
mx_layer_cells = mxCreateCellArray(2, dims);
mxSetField(mx_layers, mx_layer_index, "weights", mx_layer_cells);
mxSetField(mx_layers, mx_layer_index, "layer_names",
mxCreateString(layer_names[i].c_str()));
mx_layer_index++;
}

for (unsigned int j = 0; j < layer_blobs.size(); ++j) {
// internally data is stored as (width, height, channels, num)
// where width is the fastest dimension
mwSize dims[4] = {layer_blobs[j]->width(), layer_blobs[j]->height(),
layer_blobs[j]->channels(), layer_blobs[j]->num()};
mxArray* mx_weights =
mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
mxSetCell(mx_layer_cells, j, mx_weights);
float* weights_ptr = reinterpret_cast<float*>(mxGetPr(mx_weights));

switch (Caffe::mode()) {
case Caffe::CPU:
memcpy(weights_ptr, layer_blobs[j]->cpu_data(),
sizeof(float) * layer_blobs[j]->count());
break;
case Caffe::GPU:
CUDA_CHECK(cudaMemcpy(weights_ptr, layer_blobs[j]->gpu_data(),
sizeof(float) * layer_blobs[j]->count(), cudaMemcpyDeviceToHost));
break;
default:
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
}
}
}
}

return mx_layers;
}

static void get_weights(MEX_ARGS) {
plhs[0] = do_get_weights();
}

static void set_mode_cpu(MEX_ARGS) {
Caffe::set_mode(Caffe::CPU);
}
Expand All @@ -114,6 +252,10 @@ static void set_device(MEX_ARGS) {
Caffe::SetDevice(device_id);
}

static void get_init_key(MEX_ARGS) {
plhs[0] = mxCreateDoubleScalar(init_key);
}

static void init(MEX_ARGS) {
if (nrhs != 2) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
Expand All @@ -128,6 +270,19 @@ static void init(MEX_ARGS) {

mxFree(param_file);
mxFree(model_file);

init_key = random();
if (nlhs == 1) {
plhs[0] = mxCreateDoubleScalar(init_key);
}
}

static void reset(MEX_ARGS) {
if (net_) {
net_.reset();
init_key = -2;
LOG(INFO) << "Network reset, call init before use it again";
}
}

static void forward(MEX_ARGS) {
Expand All @@ -139,6 +294,23 @@ static void forward(MEX_ARGS) {
plhs[0] = do_forward(prhs[0]);
}

static void backward(MEX_ARGS) {
if (nrhs != 1) {
LOG(ERROR) << "Only given " << nrhs << " arguments";
mexErrMsgTxt("Wrong number of arguments");
}

plhs[0] = do_backward(prhs[0]);
}

static void is_initialized(MEX_ARGS) {
if (!net_) {
plhs[0] = mxCreateDoubleScalar(0);
} else {
plhs[0] = mxCreateDoubleScalar(1);
}
}

/** -----------------------------------------------------------------
** Available commands.
**/
Expand All @@ -150,12 +322,17 @@ struct handler_registry {
static handler_registry handlers[] = {
// Public API functions
{ "forward", forward },
{ "backward", backward },
{ "init", init },
{ "is_initialized", is_initialized },
{ "set_mode_cpu", set_mode_cpu },
{ "set_mode_gpu", set_mode_gpu },
{ "set_phase_train", set_phase_train },
{ "set_phase_test", set_phase_test },
{ "set_device", set_device },
{ "get_weights", get_weights },
{ "get_init_key", get_init_key },
{ "reset", reset },
// The end.
{ "END", NULL },
};
Expand Down
90 changes: 90 additions & 0 deletions matlab/caffe/matcaffe_batch.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
function [scores,list_im] = matcaffe_batch(list_im, use_gpu)
% scores = matcaffe_batch(list_im, use_gpu)
%
% Demo of the matlab wrapper using the ILSVRC network.
%
% input
% list_im list of images files
% use_gpu 1 to use the GPU, 0 to use the CPU
%
% output
% scores 1000 x num_images ILSVRC output vector
%
% You may need to do the following before you start matlab:
% $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda/lib64
% $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
% Or the equivalent based on where things are installed on your system
%
% Usage:
% scores = matcaffe_batch({'peppers.png','onion.png'}, 0);
% scores = matcaffe_batch('list_images.txt', 0);
if ischar(list_im)
%Assume it is a file contaning the list of images
filename = list_im;
list_im = read_cell(filename);
end
batch_size = 10;
dim = 1000;
disp(list_im)
if mod(length(list_im),batch_size)
warning(['Assuming batches of ' num2str(batch_size) ' images rest will be filled with zeros'])
end

if caffe('is_initialized') == 0
model_def_file = '../../examples/imagenet_deploy.prototxt';
model_file = '../../models/alexnet_train_iter_470000';
if exist(model_file, 'file') == 0
% NOTE: you'll have to get the pre-trained ILSVRC network
error('You need a network model file');
end
if ~exist(model_def_file,'file')
% NOTE: you'll have to get network definition
error('You need the network prototxt definition');
end
caffe('init', model_def_file, model_file);
end


% init caffe network (spews logging info)

% set to use GPU or CPU
if exist('use_gpu', 'var') && use_gpu
caffe('set_mode_gpu');
else
caffe('set_mode_cpu');
end

% put into test mode
caffe('set_phase_test');

d = load('ilsvrc_2012_mean');
IMAGE_MEAN = d.image_mean;

% prepare input

num_images = length(list_im);
scores = zeros(dim,num_images,'single');
num_batches = ceil(length(list_im)/batch_size)
initic=tic;
for bb = 1 : num_batches
batchtic = tic;
range = 1+batch_size*(bb-1):min(num_images,batch_size * bb);
tic
input_data = prepare_batch(list_im(range),IMAGE_MEAN,batch_size);
toc, tic
fprintf('Batch %d out of %d %.2f%% Complete ETA %.2f seconds\n',...
bb,num_batches,bb/num_batches*100,toc(initic)/bb*(num_batches-bb));
output_data = caffe('forward', {input_data});
toc
output_data = squeeze(output_data{1});
scores(:,range) = output_data(:,mod(range-1,batch_size)+1);
toc(batchtic)
end
toc(initic);

if exist('filename', 'var')
save([filename '.probs.mat'],'list_im','scores','-v7.3');
end



Loading