BVLC · sguada · Feb 27, 2014 · Feb 20, 2014 · Feb 20, 2014 · Feb 27, 2014
diff --git a/matlab/caffe/matcaffe.cpp b/matlab/caffe/matcaffe.cpp
@@ -1,5 +1,5 @@
 // Copyright Ross Girshick and Yangqing Jia 2013
-//
+// Modified by Sergio Guadarrama 2014
 // matcaffe.cpp provides a wrapper of the caffe::Net class as well as some
 // caffe::Caffe functions so that one could easily call it from matlab.
 // Note that for matlab, we will simply use float as the data type.
@@ -16,6 +16,7 @@ using namespace caffe;  // NOLINT(build/namespaces)
 
 // The pointer to the internal caffe::Net instance
 static shared_ptr<Net<float> > net_;
+static int init_key = -2;
 
 // Five things to be aware of:
 //   caffe uses row-major order
@@ -25,7 +26,7 @@ static shared_ptr<Net<float> > net_;
 //   images need to have the data mean subtracted
 //
 // Data coming in from matlab needs to be in the order
-//   [batch_images, channels, height, width]
+//   [width, height, channels, images]
 // where width is the fastest dimension.
 // Here is the rough matlab for putting image data into the correct
 // format:
@@ -42,6 +43,7 @@ static shared_ptr<Net<float> > net_;
 //
 // The actual forward function. It takes in a cell array of 4-D arrays as
 // input and outputs a cell array.
+
 static mxArray* do_forward(const mxArray* const bottom) {
   vector<Blob<float>*>& input_blobs = net_->input_blobs();
   CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(bottom)[0]),
@@ -66,8 +68,11 @@ static mxArray* do_forward(const mxArray* const bottom) {
   const vector<Blob<float>*>& output_blobs = net_->ForwardPrefilled();
   mxArray* mx_out = mxCreateCellMatrix(output_blobs.size(), 1);
   for (unsigned int i = 0; i < output_blobs.size(); ++i) {
-    mxArray* mx_blob = mxCreateNumericMatrix(output_blobs[i]->count(),
-        1, mxSINGLE_CLASS, mxREAL);
+    // internally data is stored as (width, height, channels, num)
+    // where width is the fastest dimension
+    mwSize dims[4] = {output_blobs[i]->width(), output_blobs[i]->height(),
+      output_blobs[i]->channels(), output_blobs[i]->num()};
+    mxArray* mx_blob =  mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
     mxSetCell(mx_out, i, mx_blob);
     float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
     switch (Caffe::mode()) {
@@ -87,7 +92,140 @@ static mxArray* do_forward(const mxArray* const bottom) {
   return mx_out;
 }
 
-// The caffe::Caffe utility functions.
+static mxArray* do_backward(const mxArray* const top_diff) {
+  vector<Blob<float>*>& output_blobs = net_->output_blobs();
+  vector<Blob<float>*>& input_blobs = net_->input_blobs();
+  CHECK_EQ(static_cast<unsigned int>(mxGetDimensions(top_diff)[0]),
+      output_blobs.size());
+  // First, copy the output diff
+  for (unsigned int i = 0; i < output_blobs.size(); ++i) {
+    const mxArray* const elem = mxGetCell(top_diff, i);
+    const float* const data_ptr =
+        reinterpret_cast<const float* const>(mxGetPr(elem));
+    switch (Caffe::mode()) {
+    case Caffe::CPU:
+      memcpy(output_blobs[i]->mutable_cpu_diff(), data_ptr,
+        sizeof(float) * output_blobs[i]->count());
+      break;
+    case Caffe::GPU:
+      cudaMemcpy(output_blobs[i]->mutable_gpu_diff(), data_ptr,
+        sizeof(float) * output_blobs[i]->count(), cudaMemcpyHostToDevice);
+      break;
+    default:
+      LOG(FATAL) << "Unknown Caffe mode.";
+    }  // switch (Caffe::mode())
+  }
+  // LOG(INFO) << "Start";
+  net_->Backward();
+  // LOG(INFO) << "End";
+  mxArray* mx_out = mxCreateCellMatrix(input_blobs.size(), 1);
+  for (unsigned int i = 0; i < input_blobs.size(); ++i) {
+    // internally data is stored as (width, height, channels, num)
+    // where width is the fastest dimension
+    mwSize dims[4] = {input_blobs[i]->width(), input_blobs[i]->height(),
+      input_blobs[i]->channels(), input_blobs[i]->num()};
+    mxArray* mx_blob =  mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
+    mxSetCell(mx_out, i, mx_blob);
+    float* data_ptr = reinterpret_cast<float*>(mxGetPr(mx_blob));
+    switch (Caffe::mode()) {
+    case Caffe::CPU:
+      memcpy(data_ptr, input_blobs[i]->cpu_diff(),
+          sizeof(float) * input_blobs[i]->count());
+      break;
+    case Caffe::GPU:
+      cudaMemcpy(data_ptr, input_blobs[i]->gpu_diff(),
+          sizeof(float) * input_blobs[i]->count(), cudaMemcpyDeviceToHost);
+      break;
+    default:
+      LOG(FATAL) << "Unknown Caffe mode.";
+    }  // switch (Caffe::mode())
+  }
+
+  return mx_out;
+}
+
+static mxArray* do_get_weights() {
+  const vector<shared_ptr<Layer<float> > >& layers = net_->layers();
+  const vector<string>& layer_names = net_->layer_names();
+
+  // Step 1: count the number of layers with weights
+  int num_layers = 0;
+  {
+    string prev_layer_name = "";
+    for (unsigned int i = 0; i < layers.size(); ++i) {
+      vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs();
+      if (layer_blobs.size() == 0) {
+        continue;
+      }
+      if (layer_names[i] != prev_layer_name) {
+        prev_layer_name = layer_names[i];
+        num_layers++;
+      }
+    }
+  }
+
+  // Step 2: prepare output array of structures
+  mxArray* mx_layers;
+  {
+    const mwSize dims[2] = {num_layers, 1};
+    const char* fnames[2] = {"weights", "layer_names"};
+    mx_layers = mxCreateStructArray(2, dims, 2, fnames);
+  }
+
+  // Step 3: copy weights into output
+  {
+    string prev_layer_name = "";
+    int mx_layer_index = 0;
+    for (unsigned int i = 0; i < layers.size(); ++i) {
+      vector<shared_ptr<Blob<float> > >& layer_blobs = layers[i]->blobs();
+      if (layer_blobs.size() == 0) {
+        continue;
+      }
+
+      mxArray* mx_layer_cells = NULL;
+      if (layer_names[i] != prev_layer_name) {
+        prev_layer_name = layer_names[i];
+        const mwSize dims[2] = {layer_blobs.size(), 1};
+        mx_layer_cells = mxCreateCellArray(2, dims);
+        mxSetField(mx_layers, mx_layer_index, "weights", mx_layer_cells);
+        mxSetField(mx_layers, mx_layer_index, "layer_names",
+            mxCreateString(layer_names[i].c_str()));
+        mx_layer_index++;
+      }
+
+      for (unsigned int j = 0; j < layer_blobs.size(); ++j) {
+        // internally data is stored as (width, height, channels, num)
+        // where width is the fastest dimension
+        mwSize dims[4] = {layer_blobs[j]->width(), layer_blobs[j]->height(),
+            layer_blobs[j]->channels(), layer_blobs[j]->num()};
+        mxArray* mx_weights =
+          mxCreateNumericArray(4, dims, mxSINGLE_CLASS, mxREAL);
+        mxSetCell(mx_layer_cells, j, mx_weights);
+        float* weights_ptr = reinterpret_cast<float*>(mxGetPr(mx_weights));
+
+        switch (Caffe::mode()) {
+        case Caffe::CPU:
+          memcpy(weights_ptr, layer_blobs[j]->cpu_data(),
+              sizeof(float) * layer_blobs[j]->count());
+          break;
+        case Caffe::GPU:
+          CUDA_CHECK(cudaMemcpy(weights_ptr, layer_blobs[j]->gpu_data(),
+              sizeof(float) * layer_blobs[j]->count(), cudaMemcpyDeviceToHost));
+          break;
+        default:
+          LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
+        }
+      }
+    }
+  }
+
+  return mx_layers;
+}
+
+static void get_weights(MEX_ARGS) {
+  plhs[0] = do_get_weights();
+}
+
 static void set_mode_cpu(MEX_ARGS) {
   Caffe::set_mode(Caffe::CPU);
 }
@@ -114,6 +252,10 @@ static void set_device(MEX_ARGS) {
   Caffe::SetDevice(device_id);
 }
 
+static void get_init_key(MEX_ARGS) {
+  plhs[0] = mxCreateDoubleScalar(init_key);
+}
+
 static void init(MEX_ARGS) {
   if (nrhs != 2) {
     LOG(ERROR) << "Only given " << nrhs << " arguments";
@@ -128,6 +270,19 @@ static void init(MEX_ARGS) {
 
   mxFree(param_file);
   mxFree(model_file);
+
+  init_key = random();
+  if (nlhs == 1) {
+    plhs[0] = mxCreateDoubleScalar(init_key);
+  }
+}
+
+static void reset(MEX_ARGS) {
+  if (net_) {
+    net_.reset();
+    init_key = -2;
+    LOG(INFO) << "Network reset, call init before use it again";
+  }
 }
 
 static void forward(MEX_ARGS) {
@@ -139,6 +294,23 @@ static void forward(MEX_ARGS) {
   plhs[0] = do_forward(prhs[0]);
 }
 
+static void backward(MEX_ARGS) {
+  if (nrhs != 1) {
+    LOG(ERROR) << "Only given " << nrhs << " arguments";
+    mexErrMsgTxt("Wrong number of arguments");
+  }
+
+  plhs[0] = do_backward(prhs[0]);
+}
+
+static void is_initialized(MEX_ARGS) {
+  if (!net_) {
+    plhs[0] = mxCreateDoubleScalar(0);
+  } else {
+    plhs[0] = mxCreateDoubleScalar(1);
+  }
+}
+
 /** -----------------------------------------------------------------
  ** Available commands.
  **/
@@ -150,12 +322,17 @@ struct handler_registry {
 static handler_registry handlers[] = {
   // Public API functions
   { "forward",            forward         },
+  { "backward",           backward        },
   { "init",               init            },
+  { "is_initialized",     is_initialized  },
   { "set_mode_cpu",       set_mode_cpu    },
   { "set_mode_gpu",       set_mode_gpu    },
   { "set_phase_train",    set_phase_train },
   { "set_phase_test",     set_phase_test  },
   { "set_device",         set_device      },
+  { "get_weights",        get_weights     },
+  { "get_init_key",       get_init_key    },
+  { "reset",              reset           },
   // The end.
   { "END",                NULL            },
 };

diff --git a/matlab/caffe/matcaffe_batch.m b/matlab/caffe/matcaffe_batch.m
@@ -0,0 +1,90 @@
+function [scores,list_im] = matcaffe_batch(list_im, use_gpu)
+% scores = matcaffe_batch(list_im, use_gpu)
+%
+% Demo of the matlab wrapper using the ILSVRC network.
+%
+% input
+%   list_im  list of images files
+%   use_gpu  1 to use the GPU, 0 to use the CPU
+%
+% output
+%   scores   1000 x num_images ILSVRC output vector
+%
+% You may need to do the following before you start matlab:
+%  $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda/lib64
+%  $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
+% Or the equivalent based on where things are installed on your system
+%
+% Usage:
+%  scores = matcaffe_batch({'peppers.png','onion.png'}, 0);
+%  scores = matcaffe_batch('list_images.txt', 0);
+if ischar(list_im)
+    %Assume it is a file contaning the list of images
+    filename = list_im;
+    list_im = read_cell(filename);
+end
+batch_size = 10;
+dim = 1000;
+disp(list_im)
+if mod(length(list_im),batch_size)
+    warning(['Assuming batches of ' num2str(batch_size) ' images rest will be filled with zeros'])
+end
+
+if caffe('is_initialized') == 0
+  model_def_file = '../../examples/imagenet_deploy.prototxt';
+  model_file = '../../models/alexnet_train_iter_470000';
+  if exist(model_file, 'file') == 0
+    % NOTE: you'll have to get the pre-trained ILSVRC network
+    error('You need a network model file');
+  end
+  if ~exist(model_def_file,'file')
+    % NOTE: you'll have to get network definition
+    error('You need the network prototxt definition');
+  end
+  caffe('init', model_def_file, model_file);
+end
+
+
+% init caffe network (spews logging info)
+
+% set to use GPU or CPU
+if exist('use_gpu', 'var') && use_gpu
+    caffe('set_mode_gpu');
+else
+    caffe('set_mode_cpu');
+end
+
+% put into test mode
+caffe('set_phase_test');
+
+d = load('ilsvrc_2012_mean');
+IMAGE_MEAN = d.image_mean;
+
+% prepare input
+
+num_images = length(list_im);
+scores = zeros(dim,num_images,'single');
+num_batches = ceil(length(list_im)/batch_size)
+initic=tic;
+for bb = 1 : num_batches
+    batchtic = tic;
+    range = 1+batch_size*(bb-1):min(num_images,batch_size * bb);
+    tic
+    input_data = prepare_batch(list_im(range),IMAGE_MEAN,batch_size);
+    toc, tic
+    fprintf('Batch %d out of %d %.2f%% Complete ETA %.2f seconds\n',...
+        bb,num_batches,bb/num_batches*100,toc(initic)/bb*(num_batches-bb));
+    output_data = caffe('forward', {input_data});
+    toc
+    output_data = squeeze(output_data{1});
+    scores(:,range) = output_data(:,mod(range-1,batch_size)+1);
+    toc(batchtic)
+end
+toc(initic);
+
+if exist('filename', 'var')
+    save([filename '.probs.mat'],'list_im','scores','-v7.3');
+end
+
+
+