diff --git a/include/caffe/apollonet.hpp b/include/caffe/apollonet.hpp index d8b9758c0b5..d5a8642c637 100644 --- a/include/caffe/apollonet.hpp +++ b/include/caffe/apollonet.hpp @@ -52,6 +52,10 @@ class ApolloNet { CopyTrainedLayersFrom(param); } + void CopyLayerFrom(const LayerParameter& source_layer); + + void SaveTrainedLayersTo(const string trained_filename) const; + void Update(Dtype lr, Dtype momentum, Dtype clip_gradients, Dtype weight_decay); @@ -99,6 +103,7 @@ class ApolloNet { map param_lr_mults_; map > > > bottom_blobs_; map > bottom_blob_names_; + map param_cache_; vector active_layers_vec_; set active_layers_set_; set active_params_set_; diff --git a/python/apollocaffe/cpp/_apollocaffe.pyx b/python/apollocaffe/cpp/_apollocaffe.pyx index c0689d7328d..b323aa77f9c 100644 --- a/python/apollocaffe/cpp/_apollocaffe.pyx +++ b/python/apollocaffe/cpp/_apollocaffe.pyx @@ -388,16 +388,21 @@ cdef class ApolloNet: return blobs def save(self, filename): - assert filename.endswith('.h5'), "saving only supports h5 files" - with h5py.File(filename, 'w') as f: - for name, value in self.params.items(): - f[name] = pynp.copy(value.data) + _, extension = os.path.splitext(filename) + if extension == '.h5': + with h5py.File(filename, 'w') as f: + for name, value in self.params.items(): + f[name] = pynp.copy(value.data) + elif extension == '.caffemodel': + self.thisptr.SaveTrainedLayersTo(filename) + else: + assert False, "Error, filename is neither h5 nor caffemodel: %s, %s" % (filename, extension) def load(self, filename): - if len(self.params) == 0: - raise ValueError('WARNING, loading into empty net.') _, extension = os.path.splitext(filename) if extension == '.h5': + if len(self.params) == 0: + raise ValueError('WARNING, loading into empty net.') with h5py.File(filename, 'r') as f: params = self.params names = [] diff --git a/python/apollocaffe/cpp/definitions.pxd b/python/apollocaffe/cpp/definitions.pxd index 246bfc6f196..960d44507a4 100644 --- a/python/apollocaffe/cpp/definitions.pxd +++ b/python/apollocaffe/cpp/definitions.pxd @@ -85,6 +85,7 @@ cdef extern from "caffe/apollonet.hpp" namespace "caffe": void set_phase_train() Phase phase() void CopyTrainedLayersFrom(string trained_filename) except + + void SaveTrainedLayersTo(string trained_filename) except + vector[string]& active_layer_names() set[string]& active_param_names() diff --git a/src/caffe/apollonet.cpp b/src/caffe/apollonet.cpp index d76ba48cba0..7313b49c3f7 100644 --- a/src/caffe/apollonet.cpp +++ b/src/caffe/apollonet.cpp @@ -8,6 +8,7 @@ #include "caffe/common.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/upgrade_proto.hpp" @@ -118,6 +119,9 @@ Dtype ApolloNet::ForwardLayer(shared_ptr > layer) { if (new_layer) { layer->SetUp(bottom_vec, top_vec); AddLayerParams(layer); + if (param_cache_.find(layer_name) != param_cache_.end()) { + CopyLayerFrom(param_cache_[layer_name]); + } } for (int param_id = 0; param_id < layer->param_names().size(); ++param_id) { @@ -247,6 +251,9 @@ Dtype ApolloNet::ForwardLayer(const string& layer_param_string) { if (new_layer) { layer->SetUp(bottom_vec, top_vec); AddLayerParams(layer); + if (param_cache_.find(layer_name) != param_cache_.end()) { + CopyLayerFrom(param_cache_[layer_name]); + } } for (int param_id = 0; param_id < layer->param_names().size(); ++param_id) { @@ -404,21 +411,43 @@ void ApolloNet::CopyTrainedLayersFrom(const NetParameter& param) { const string& source_layer_name = source_layer.name(); if (layers_map_.find(source_layer_name) == layers_map_.end()) { - LOG(INFO) << "Ignoring source layer " << source_layer_name; + param_cache_[source_layer_name] = source_layer; + LOG(INFO) << "Caching source layer blobs " << source_layer_name; continue; } + CopyLayerFrom(source_layer); + } +} - LOG(INFO) << "Copying source layer " << source_layer_name; - vector > >& target_blobs = - layers_map_[source_layer_name]->blobs(); +template +void ApolloNet::CopyLayerFrom(const LayerParameter& source_layer) { + const string& source_layer_name = source_layer.name(); + LOG(INFO) << "Copying source layer blobs " << source_layer_name; + vector > >& target_blobs = + layers_map_[source_layer_name]->blobs(); + + ASSERT(target_blobs.size() == source_layer.blobs_size(), + "Incompatible number of blobs for layer " << source_layer_name); + for (int j = 0; j < target_blobs.size(); ++j) { + const bool kReshape = false; + target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); + } +} - ASSERT(target_blobs.size() == source_layer.blobs_size(), - "Incompatible number of blobs for layer " << source_layer_name); - for (int j = 0; j < target_blobs.size(); ++j) { - const bool kReshape = false; - target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); - } +template +void ApolloNet::SaveTrainedLayersTo(const string trained_filename) + const { + NetParameter param; + DLOG(INFO) << "Serializing " << layers_map_.size() << " layers"; + typename map > >::const_iterator it = + layers_map_.begin(); + while (it != layers_map_.end()) { + shared_ptr > layer = it->second; + LayerParameter* layer_param = param.add_layer(); + layer->ToProto(layer_param); + ++it; } + WriteProtoToBinaryFile(param, trained_filename); } INSTANTIATE_CLASS(ApolloNet);