Skip to content

Commit

Permalink
Merge pull request BVLC#518 from longjon/fromto
Browse files Browse the repository at this point in the history
Add From/To options for Forward and Backward
  • Loading branch information
longjon committed Jul 20, 2014
2 parents 5d7a4d5 + 5d425e2 commit 8cd7498
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 24 deletions.
13 changes: 13 additions & 0 deletions include/caffe/net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ class Net {
// Run forward with the input blobs already fed separately. You can get the
// input blobs using input_blobs().
const vector<Blob<Dtype>*>& ForwardPrefilled(Dtype* loss = NULL);

// The From and To variants of Forward and Backward operate on the
// (topological) ordering by which the net is specified. For general DAG
// networks, note that (1) computing from one layer to another might entail
// extra computation on unrelated branches, and (2) computation starting in
// the middle may be incorrect if all of the layers of a fan-in are not
// included.
Dtype ForwardFromTo(int start, int end);
Dtype ForwardFrom(int start);
Dtype ForwardTo(int end);
// Run forward using a set of bottom blobs, and return the result.
const vector<Blob<Dtype>*>& Forward(const vector<Blob<Dtype>* > & bottom,
Dtype* loss = NULL);
Expand All @@ -46,6 +56,9 @@ class Net {
// computes the gradient w.r.t the parameters, and the data has already
// been provided during the forward pass.
void Backward();
void BackwardFromTo(int start, int end);
void BackwardFrom(int start);
void BackwardTo(int end);

Dtype ForwardBackward(const vector<Blob<Dtype>* > & bottom) {
Dtype loss;
Expand Down
8 changes: 4 additions & 4 deletions python/caffe/_caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,12 @@ struct CaffeNet {
}
}

void Forward() {
net_->ForwardPrefilled();
void Forward(int start, int end) {
net_->ForwardFromTo(start, end);
}

void Backward() {
net_->Backward();
void Backward(int start, int end) {
net_->BackwardFromTo(start, end);
}

void set_input_arrays(object data_obj, object labels_obj) {
Expand Down
43 changes: 34 additions & 9 deletions python/caffe/pycaffe.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ def _Net_params(self):
return OrderedDict([(lr.name, lr.blobs) for lr in self.layers
if len(lr.blobs) > 0])


def _Net_forward(self, blobs=None, **kwargs):
def _Net_forward(self, blobs=None, start=None, end=None, **kwargs):
"""
Forward pass: prepare inputs and run the net forward.
Expand All @@ -44,13 +43,27 @@ def _Net_forward(self, blobs=None, **kwargs):
kwargs: Keys are input blob names and values are blob ndarrays.
For formatting inputs for Caffe, see Net.preprocess().
If None, input is taken from data layers.
start: optional name of layer at which to begin the forward pass
end: optional name of layer at which to finish the forward pass (inclusive)
Give
outs: {blob name: blob ndarray} dict.
"""
if blobs is None:
blobs = []

if start is not None:
start_ind = [lr.name for lr in self.layers].index(start)
else:
start_ind = 0

if end is not None:
end_ind = [lr.name for lr in self.layers].index(end)
outputs = set([end] + blobs)
else:
end_ind = len(self.layers) - 1
outputs = set(self.outputs + blobs)

if kwargs:
if set(kwargs.keys()) != set(self.inputs):
raise Exception('Input blob arguments do not match net inputs.')
Expand All @@ -63,28 +76,41 @@ def _Net_forward(self, blobs=None, **kwargs):
raise Exception('{} blob is not 4-d'.format(in_))
self.blobs[in_].data[...] = blob

self._forward()
self._forward(start_ind, end_ind)

# Unpack blobs to extract
outs = {out: self.blobs[out].data for out in set(self.outputs + blobs)}
return outs
return {out: self.blobs[out].data for out in outputs}


def _Net_backward(self, diffs=None, **kwargs):
def _Net_backward(self, diffs=None, start=None, end=None, **kwargs):
"""
Backward pass: prepare diffs and run the net backward.
Take
diffs: list of diffs to return in addition to bottom diffs.
kwargs: Keys are output blob names and values are diff ndarrays.
If None, top diffs are taken from forward loss.
start: optional name of layer at which to begin the backward pass
end: optional name of layer at which to finish the backward pass (inclusive)
Give
outs: {blob name: diff ndarray} dict.
"""
if diffs is None:
diffs = []

if start is not None:
start_ind = [lr.name for lr in self.layers].index(start)
else:
start_ind = len(self.layers) - 1

if end is not None:
end_ind = [lr.name for lr in self.layers].index(end)
outputs = set([end] + diffs)
else:
end_ind = 0
outputs = set(self.inputs + diffs)

if kwargs:
if set(kwargs.keys()) != set(self.outputs):
raise Exception('Top diff arguments do not match net outputs.')
Expand All @@ -97,11 +123,10 @@ def _Net_backward(self, diffs=None, **kwargs):
raise Exception('{} diff is not 4-d'.format(top))
self.blobs[top].diff[...] = diff

self._backward()
self._backward(start_ind, end_ind)

# Unpack diffs to extract
outs = {out: self.blobs[out].diff for out in set(self.inputs + diffs)}
return outs
return {out: self.blobs[out].diff for out in outputs}


def _Net_forward_all(self, blobs=None, **kwargs):
Expand Down
56 changes: 45 additions & 11 deletions src/caffe/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,16 +335,34 @@ void Net<Dtype>::GetLearningRateAndWeightDecay() {
}

template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::ForwardPrefilled(Dtype* loss) {
if (loss != NULL) {
*loss = Dtype(0.);
}
for (int i = 0; i < layers_.size(); ++i) {
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
CHECK_GE(start, 0);
CHECK_LT(end, layers_.size());
Dtype loss = 0;
for (int i = start; i <= end; ++i) {
// LOG(ERROR) << "Forwarding " << layer_names_[i];
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
if (loss != NULL) {
*loss += layer_loss;
}
loss += layer_loss;
}
return loss;
}

template <typename Dtype>
Dtype Net<Dtype>::ForwardFrom(int start) {
return ForwardFromTo(start, layers_.size() - 1);
}

template <typename Dtype>
Dtype Net<Dtype>::ForwardTo(int end) {
return ForwardFromTo(0, end);
}

template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::ForwardPrefilled(Dtype* loss) {
if (loss != NULL) {
*loss = ForwardFromTo(0, layers_.size() - 1);
} else {
ForwardFromTo(0, layers_.size() - 1);
}
return net_output_blobs_;
}
Expand Down Expand Up @@ -380,10 +398,11 @@ string Net<Dtype>::Forward(const string& input_blob_protos, Dtype* loss) {
return output;
}


template <typename Dtype>
void Net<Dtype>::Backward() {
for (int i = layers_.size() - 1; i >= 0; --i) {
void Net<Dtype>::BackwardFromTo(int start, int end) {
CHECK_GE(end, 0);
CHECK_LT(start, layers_.size());
for (int i = start; i >= end; --i) {
if (layer_need_backward_[i]) {
layers_[i]->Backward(
top_vecs_[i], bottom_need_backward_[i], &bottom_vecs_[i]);
Expand Down Expand Up @@ -422,6 +441,21 @@ void Net<Dtype>::ShareTrainedLayersWith(Net* other) {
}
}

template <typename Dtype>
void Net<Dtype>::BackwardFrom(int start) {
BackwardFromTo(start, 0);
}

template <typename Dtype>
void Net<Dtype>::BackwardTo(int end) {
BackwardFromTo(layers_.size() - 1, end);
}

template <typename Dtype>
void Net<Dtype>::Backward() {
BackwardFromTo(layers_.size() - 1, 0);
}

template <typename Dtype>
void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
int num_source_layers = param.layers_size();
Expand Down
34 changes: 34 additions & 0 deletions src/caffe/test/test_net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,4 +801,38 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
}
}

TYPED_TEST(NetTest, TestFromTo) {
typedef typename TypeParam::Dtype Dtype;
this->InitTinyNet();

// Run Forward and Backward, recording the data diff and loss.
Blob<Dtype> data;
data.ReshapeLike(*this->net_->blob_by_name("data"));
this->net_->ForwardPrefilled();
this->net_->Backward();
data.CopyFrom(*this->net_->blob_by_name("data"), true, true);
const Dtype *loss_ptr = this->net_->output_blobs()[0]->cpu_data();
Dtype loss = *loss_ptr;

// Check that combining partial Forwards gives the same loss.
for (int i = 1; i < this->net_->layers().size(); ++i) {
// Note that we skip layer zero to keep the same data.
this->net_->ForwardFromTo(1, 1);
if (i < this->net_->layers().size() - 1) {
this->net_->ForwardFrom(i + 1);
}
EXPECT_EQ(loss, *loss_ptr);
}

// Check that combining partial Backwards gives the same data diff.
for (int i = 1; i < this->net_->layers().size(); ++i) {
this->net_->BackwardTo(i);
this->net_->BackwardFrom(i - 1);
for (int j = 0; j < data.count(); ++j) {
EXPECT_EQ(data.cpu_diff()[j],
this->net_->blob_by_name("data")->cpu_diff()[j]);
}
}
}

} // namespace caffe

0 comments on commit 8cd7498

Please sign in to comment.