From c2b74c3cf66b229c0ad926cf052c7e7ed522038d Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 17 Jul 2014 20:12:44 -0700 Subject: [PATCH 1/4] Add NetState message with phase, level, stage; NetStateRule message with filtering rules for Layers. --- src/caffe/proto/caffe.proto | 95 +++++++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 14 deletions(-) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index c27e9e5aebe..d2f61e57f34 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -56,24 +56,55 @@ message NetParameter { // If set False, then whether to carry out backward is determined // automatically according to the net structure and learning rates. optional bool force_backward = 5 [default = false]; + // The current "state" of the network, including the phase, level, and stage. + // Some layers may be included/excluded depending on this state and the states + // specified in the layers' include and exclude fields. + optional NetState state = 6; } +// NOTE +// Update the next available ID when you add a new SolverParameter field. +// +// SolverParameter next available ID: 27 (last added: test_state) message SolverParameter { - // {train,test}_net specify a path to a file containing the {train,test} net - // parameters; {train,test}_net_param specify the net parameters directly - // inside the SolverParameter. + ////////////////////////////////////////////////////////////////////////////// + // Specifying the train and test networks // - // Only either train_net or train_net_param (not both) should be specified. - // You may specify 0 or more test_net and/or test_net_param. All - // nets specified using test_net_param will be tested first, followed by all - // nets specified using test_net (each processed in the order specified in - // the prototxt). - optional string train_net = 1; // The proto filename for the train net. - repeated string test_net = 2; // The proto filenames for the test nets. - optional NetParameter train_net_param = 21; // Full params for the train net. - repeated NetParameter test_net_param = 22; // Full params for the test nets. - // The number of iterations for each testing phase. + // Exactly one train net must be specified using one of the following fields: + // train_net_param, train_net, net_param, net + // One or more test nets may be specified using any of the following fields: + // test_net_param, test_net, net_param, net + // If more than one test net field is specified (e.g., both net and + // test_net are specified), they will be evaluated in the field order given + // above: (1) test_net_param, (2) test_net, (3) net_param/net. + // A test_iter must be specified for each test_net. + // A test_level and/or a test_stage may also be specified for each test_net. + ////////////////////////////////////////////////////////////////////////////// + + // Proto filename for the train net, possibly combined with one or more + // test nets. + optional string net = 24; + // Inline train net param, possibly combined with one or more test nets. + optional NetParameter net_param = 25; + + optional string train_net = 1; // Proto filename for the train net. + repeated string test_net = 2; // Proto filenames for the test nets. + optional NetParameter train_net_param = 21; // Inline train net params. + repeated NetParameter test_net_param = 22; // Inline test net params. + + // The states for the train/test nets. Must be unspecified or + // specified once per net. + // + // By default, all states will have solver = true; + // train_state will have phase = TRAIN, + // and all test_state's will have phase = TEST. + // Other defaults are set according to the NetState defaults. + optional NetState train_state = 26; + repeated NetState test_state = 27; + + // The number of iterations for each test net. repeated int32 test_iter = 3; + // The number of iterations between two testing phases. optional int32 test_interval = 4 [default = 0]; optional bool test_compute_loss = 19 [default = false]; @@ -118,15 +149,51 @@ message SolverState { repeated BlobProto history = 3; // The history for sgd solvers } +enum Phase { + TRAIN = 0; + TEST = 1; +} + +message NetState { + optional Phase phase = 1 [default = TEST]; + optional int32 level = 2 [default = 0]; + repeated string stage = 3; +} + +message NetStateRule { + // Set phase to require the NetState have a particular phase (TRAIN or TEST) + // to meet this rule. + optional Phase phase = 1; + + // Set the minimum and/or maximum levels in which the layer should be used. + // Leave undefined to meet the rule regardless of level. + optional int32 min_level = 2; + optional int32 max_level = 3; + + // A customizable set of stages. + // The net must have ALL of the specified stages to meet the rule. + // (Use multiple NetStateRules to specify conjunctions of stages.) + repeated string stage = 4; +} + // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available ID: 32 (last added: slice_param) +// LayerParameter next available ID: 34 (last added: exclude) message LayerParameter { repeated string bottom = 2; // the name of the bottom blobs repeated string top = 3; // the name of the top blobs optional string name = 4; // the layer name + // Rules controlling whether and when a layer is included in the network, + // based on the current NetState. You may specify a non-zero number of rules + // to include OR exclude, but not both. If no include or exclude rules are + // specified, the layer is always included. If the current NetState meets + // ANY (i.e., one or more) of the specified rules, the layer is + // included/excluded. + repeated NetStateRule include = 32; + repeated NetStateRule exclude = 33; + // NOTE // Add new LayerTypes to the enum below in lexicographical order (other than // starting with NONE), starting with the next available ID in the comment From b8833565f8234903d33c880017c3768cd02e6fd9 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Tue, 29 Jul 2014 11:22:52 -0700 Subject: [PATCH 2/4] Add unit tests and skeleton code for Net/Solver filtering functionality. --- include/caffe/net.hpp | 8 + include/caffe/solver.hpp | 5 + src/caffe/net.cpp | 11 + src/caffe/test/test_net.cpp | 820 +++++++++++++++++++++++++++++++++ src/caffe/test/test_solver.cpp | 98 ++++ 5 files changed, 942 insertions(+) create mode 100644 src/caffe/test/test_solver.cpp diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 7548011d973..808b244e47d 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -115,6 +115,14 @@ class Net { void set_debug_info(const bool value) { debug_info_ = value; } + // Helpers for Init. + // Remove layers that the user specified should be excluded given the current + // phase, level, and stage. + static void FilterNet(const NetParameter& param, + NetParameter* param_filtered); + static bool StateMeetsRule(const NetState& state, const NetStateRule& rule, + const string& layer_name); + protected: // Helpers for Init. // Append a new input or top blob to the net. diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 3112c59e0fc..52dcd1936c5 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -6,6 +6,8 @@ #include #include +#include "caffe/net.hpp" + namespace caffe { template @@ -20,6 +22,9 @@ class Solver { inline void Solve(const string resume_file) { Solve(resume_file.c_str()); } virtual ~Solver() {} inline shared_ptr > net() { return net_; } + inline const vector > >& test_nets() { + return test_nets_; + } protected: // PreSolve is run before any solving iteration starts, allowing one to diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 228f826a8b9..a02b1118826 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -167,6 +167,17 @@ void Net::Init(const NetParameter& in_param) { debug_info_ = false; } +template +void Net::FilterNet(const NetParameter& param, + NetParameter* param_filtered) { +} + +template +bool Net::StateMeetsRule(const NetState& state, + const NetStateRule& rule, const string& layer_name) { + return true; +} + // Helper for Net::Init: add a new input or top blob to the net. (Inputs have // layer_id == -1, tops have layer_id >= 0.) template diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index e84701d941c..c4181345573 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -835,4 +835,824 @@ TYPED_TEST(NetTest, TestFromTo) { } } +class FilterNetTest : public ::testing::Test { + protected: + void RunFilterNetTest( + const string& input_param_string, const string& filtered_param_string) { + NetParameter input_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + input_param_string, &input_param)); + NetParameter expected_filtered_param; + CHECK(google::protobuf::TextFormat::ParseFromString( + filtered_param_string, &expected_filtered_param)); + NetParameter actual_filtered_param; + Net::FilterNet(input_param, &actual_filtered_param); + EXPECT_EQ(expected_filtered_param.DebugString(), + actual_filtered_param.DebugString()); + // Also test idempotence. + NetParameter double_filtered_param; + Net::FilterNet(actual_filtered_param, &double_filtered_param); + EXPECT_EQ(actual_filtered_param.DebugString(), + double_filtered_param.DebugString()); + } +}; + +TEST_F(FilterNetTest, TestNoFilter) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterLeNetTrainTest) { + const string& input_proto = + "name: 'LeNet' " + "layers { " + " name: 'mnist' " + " type: DATA " + " top: 'data' " + " top: 'label' " + " data_param { " + " source: 'mnist-train-leveldb' " + " scale: 0.00390625 " + " batch_size: 64 " + " } " + " include: { phase: TRAIN } " + "} " + "layers { " + " name: 'mnist' " + " type: DATA " + " top: 'data' " + " top: 'label' " + " data_param { " + " source: 'mnist-test-leveldb' " + " scale: 0.00390625 " + " batch_size: 100 " + " } " + " include: { phase: TEST } " + "} " + "layers { " + " name: 'conv1' " + " type: CONVOLUTION " + " bottom: 'data' " + " top: 'conv1' " + " blobs_lr: 1 " + " blobs_lr: 2 " + " convolution_param { " + " num_output: 20 " + " kernel_size: 5 " + " stride: 1 " + " weight_filler { " + " type: 'xavier' " + " } " + " bias_filler { " + " type: 'constant' " + " } " + " } " + "} " + "layers { " + " name: 'ip1' " + " type: INNER_PRODUCT " + " bottom: 'conv1' " + " top: 'ip1' " + " blobs_lr: 1 " + " blobs_lr: 2 " + " inner_product_param { " + " num_output: 10 " + " weight_filler { " + " type: 'xavier' " + " } " + " bias_filler { " + " type: 'constant' " + " } " + " } " + "} " + "layers { " + " name: 'accuracy' " + " type: ACCURACY " + " bottom: 'ip1' " + " bottom: 'label' " + " top: 'accuracy' " + " include: { phase: TEST } " + "} " + "layers { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'ip2' " + " bottom: 'label' " + " top: 'loss' " + "} "; + const string input_proto_train = "state: { phase: TRAIN } " + input_proto; + const string input_proto_test = "state: { phase: TEST } " + input_proto; + const string& output_proto_train = + "state: { phase: TRAIN } " + "name: 'LeNet' " + "layers { " + " name: 'mnist' " + " type: DATA " + " top: 'data' " + " top: 'label' " + " data_param { " + " source: 'mnist-train-leveldb' " + " scale: 0.00390625 " + " batch_size: 64 " + " } " + " include: { phase: TRAIN } " + "} " + "layers { " + " name: 'conv1' " + " type: CONVOLUTION " + " bottom: 'data' " + " top: 'conv1' " + " blobs_lr: 1 " + " blobs_lr: 2 " + " convolution_param { " + " num_output: 20 " + " kernel_size: 5 " + " stride: 1 " + " weight_filler { " + " type: 'xavier' " + " } " + " bias_filler { " + " type: 'constant' " + " } " + " } " + "} " + "layers { " + " name: 'ip1' " + " type: INNER_PRODUCT " + " bottom: 'conv1' " + " top: 'ip1' " + " blobs_lr: 1 " + " blobs_lr: 2 " + " inner_product_param { " + " num_output: 10 " + " weight_filler { " + " type: 'xavier' " + " } " + " bias_filler { " + " type: 'constant' " + " } " + " } " + "} " + "layers { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'ip2' " + " bottom: 'label' " + " top: 'loss' " + "} "; + const string& output_proto_test = + "state: { phase: TEST } " + "name: 'LeNet' " + "layers { " + " name: 'mnist' " + " type: DATA " + " top: 'data' " + " top: 'label' " + " data_param { " + " source: 'mnist-test-leveldb' " + " scale: 0.00390625 " + " batch_size: 100 " + " } " + " include: { phase: TEST } " + "} " + "layers { " + " name: 'conv1' " + " type: CONVOLUTION " + " bottom: 'data' " + " top: 'conv1' " + " blobs_lr: 1 " + " blobs_lr: 2 " + " convolution_param { " + " num_output: 20 " + " kernel_size: 5 " + " stride: 1 " + " weight_filler { " + " type: 'xavier' " + " } " + " bias_filler { " + " type: 'constant' " + " } " + " } " + "} " + "layers { " + " name: 'ip1' " + " type: INNER_PRODUCT " + " bottom: 'conv1' " + " top: 'ip1' " + " blobs_lr: 1 " + " blobs_lr: 2 " + " inner_product_param { " + " num_output: 10 " + " weight_filler { " + " type: 'xavier' " + " } " + " bias_filler { " + " type: 'constant' " + " } " + " } " + "} " + "layers { " + " name: 'accuracy' " + " type: ACCURACY " + " bottom: 'ip1' " + " bottom: 'label' " + " top: 'accuracy' " + " include: { phase: TEST } " + "} " + "layers { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'ip2' " + " bottom: 'label' " + " top: 'loss' " + "} "; + this->RunFilterNetTest(input_proto_train, output_proto_train); + this->RunFilterNetTest(input_proto_test, output_proto_test); +} + +TEST_F(FilterNetTest, TestFilterOutByStage) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + " include: { stage: 'mystage' } " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + const string& output_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, output_proto); +} + +TEST_F(FilterNetTest, TestFilterOutByStage2) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { stage: 'mystage' } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + const string& output_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, output_proto); +} + +TEST_F(FilterNetTest, TestFilterInByStage) { + const string& input_proto = + "state: { stage: 'mystage' } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { stage: 'mystage' } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterInByStage2) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " exclude: { stage: 'mystage' } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterOutByMultipleStage) { + const string& input_proto = + "state: { stage: 'mystage' } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { stage: 'mystage' stage: 'myotherstage' } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { stage: 'mystage' } " + "} "; + const string& output_proto = + "state: { stage: 'mystage' } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { stage: 'mystage' } " + "} "; + this->RunFilterNetTest(input_proto, output_proto); +} + +TEST_F(FilterNetTest, TestFilterInByMultipleStage) { + const string& input_proto = + "state: { stage: 'mystage' } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { stage: 'myotherstage' } " + " include: { stage: 'mystage' } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { stage: 'mystage' } " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterInByMultipleStage2) { + const string& input_proto = + "state: { stage: 'mystage' stage: 'myotherstage' } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { stage: 'mystage' stage: 'myotherstage' } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { stage: 'mystage' } " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterOutByMinLevel) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { min_level: 3 } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + const string& output_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, output_proto); +} + +TEST_F(FilterNetTest, TestFilterOutByMaxLevel) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { max_level: -3 } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + const string& output_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, output_proto); +} + +TEST_F(FilterNetTest, TestFilterInByMinLevel) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { min_level: 0 } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterInByMinLevel2) { + const string& input_proto = + "state: { level: 7 } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { min_level: 3 } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterInByMaxLevel) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { max_level: 0 } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterInByMaxLevel2) { + const string& input_proto = + "state: { level: -7 } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { max_level: -3 } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + "} "; + this->RunFilterNetTest(input_proto, input_proto); +} + +TEST_F(FilterNetTest, TestFilterInOutByIncludeMultiRule) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { min_level: 2 phase: TRAIN } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { min_level: 2 phase: TEST } " + "} "; + const string& input_proto_train = + "state: { level: 4 phase: TRAIN } " + input_proto; + const string& input_proto_test = + "state: { level: 4 phase: TEST } " + input_proto; + const string& output_proto_train = + "state: { level: 4 phase: TRAIN } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { min_level: 2 phase: TRAIN } " + "} "; + const string& output_proto_test = + "state: { level: 4 phase: TEST } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { min_level: 2 phase: TEST } " + "} "; + this->RunFilterNetTest(input_proto_train, output_proto_train); + this->RunFilterNetTest(input_proto_test, output_proto_test); +} + +TEST_F(FilterNetTest, TestFilterInByIncludeMultiRule) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " include: { min_level: 2 phase: TRAIN } " + " include: { phase: TEST } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { min_level: 2 phase: TEST } " + " include: { phase: TRAIN } " + "} "; + const string& input_proto_train = + "state: { level: 2 phase: TRAIN } " + input_proto; + const string& input_proto_test = + "state: { level: 2 phase: TEST } " + input_proto; + this->RunFilterNetTest(input_proto_train, input_proto_train); + this->RunFilterNetTest(input_proto_test, input_proto_test); +} + +TEST_F(FilterNetTest, TestFilterInOutByExcludeMultiRule) { + const string& input_proto = + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " exclude: { min_level: 2 phase: TRAIN } " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " exclude: { min_level: 2 phase: TEST } " + "} "; + const string& input_proto_train = + "state: { level: 4 phase: TRAIN } " + input_proto; + const string& input_proto_test = + "state: { level: 4 phase: TEST } " + input_proto; + const string& output_proto_train = + "state: { level: 4 phase: TRAIN } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " exclude: { min_level: 2 phase: TEST } " + "} "; + const string& output_proto_test = + "state: { level: 4 phase: TEST } " + "name: 'TestNetwork' " + "layers: { " + " name: 'data' " + " type: DATA " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " bottom: 'data' " + " top: 'innerprod' " + " exclude: { min_level: 2 phase: TRAIN } " + "} "; + this->RunFilterNetTest(input_proto_train, output_proto_train); + this->RunFilterNetTest(input_proto_test, output_proto_test); +} + } // namespace caffe diff --git a/src/caffe/test/test_solver.cpp b/src/caffe/test/test_solver.cpp new file mode 100644 index 00000000000..7380ab86308 --- /dev/null +++ b/src/caffe/test/test_solver.cpp @@ -0,0 +1,98 @@ +// Copyright 2014 BVLC and contributors. + +#include +#include +#include + +#include "google/protobuf/text_format.h" + +#include "gtest/gtest.h" +#include "caffe/common.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/solver.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +using std::ostringstream; + +namespace caffe { + +template +class SolverTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + virtual void InitSolverFromProtoString(const string& proto) { + SolverParameter param; + CHECK(google::protobuf::TextFormat::ParseFromString(proto, ¶m)); + solver_.reset(new SGDSolver(param)); + } + + shared_ptr > solver_; +}; + +TYPED_TEST_CASE(SolverTest, TestDtypesAndDevices); + +TYPED_TEST(SolverTest, TestInitTrainTestNets) { + const string& proto = + "test_interval: 10 " + "test_iter: 10 " + "test_state: { stage: 'with-softmax' }" + "test_iter: 10 " + "test_state: {}" + "net_param { " + " name: 'TestNetwork' " + " layers: { " + " name: 'data' " + " type: DUMMY_DATA " + " dummy_data_param { " + " num: 5 " + " channels: 3 " + " height: 10 " + " width: 10 " + " num: 5 " + " channels: 1 " + " height: 1 " + " width: 1 " + " } " + " top: 'data' " + " top: 'label' " + " } " + " layers: { " + " name: 'innerprod' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " } " + " bottom: 'data' " + " top: 'innerprod' " + " } " + " layers: { " + " name: 'accuracy' " + " type: ACCURACY " + " bottom: 'innerprod' " + " bottom: 'label' " + " top: 'accuracy' " + " exclude: { phase: TRAIN } " + " } " + " layers: { " + " name: 'loss' " + " type: SOFTMAX_LOSS " + " bottom: 'innerprod' " + " bottom: 'label' " + " include: { phase: TRAIN } " + " include: { phase: TEST stage: 'with-softmax' } " + " } " + "} "; + this->InitSolverFromProtoString(proto); + ASSERT_TRUE(this->solver_->net()); + EXPECT_TRUE(this->solver_->net()->has_layer("loss")); + EXPECT_FALSE(this->solver_->net()->has_layer("accuracy")); + ASSERT_EQ(2, this->solver_->test_nets().size()); + EXPECT_TRUE(this->solver_->test_nets()[0]->has_layer("loss")); + EXPECT_TRUE(this->solver_->test_nets()[0]->has_layer("accuracy")); + EXPECT_FALSE(this->solver_->test_nets()[1]->has_layer("loss")); + EXPECT_TRUE(this->solver_->test_nets()[1]->has_layer("accuracy")); +} + +} // namespace caffe From cb4555c052a6ac2ba65a7d71681d9320a6afefb6 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 17 Jul 2014 20:38:53 -0700 Subject: [PATCH 3/4] Incorporate NetState{,Rule} into Solver/Net. Net::FilterNet includes/excludes layers based on whether the NetState meets each layer's NetStateRule(s). --- include/caffe/solver.hpp | 2 + src/caffe/net.cpp | 78 ++++++++++++++++++++++- src/caffe/solver.cpp | 133 +++++++++++++++++++++++++++++++-------- 3 files changed, 185 insertions(+), 28 deletions(-) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 52dcd1936c5..811d70372a4 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -16,6 +16,8 @@ class Solver { explicit Solver(const SolverParameter& param); explicit Solver(const string& param_file); void Init(const SolverParameter& param); + void InitTrainNet(); + void InitTestNets(); // The main entry of the solver function. In default, iter will be zero. Pass // in a non-zero iter number to resume training for a pre-trained net. virtual void Solve(const char* resume_file = NULL); diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index a02b1118826..a4d1f235a5a 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -32,11 +32,15 @@ Net::Net(const string& param_file) { template void Net::Init(const NetParameter& in_param) { + // Filter layers based on their include/exclude rules and + // the current NetState. + NetParameter filtered_param; + FilterNet(in_param, &filtered_param); LOG(INFO) << "Initializing net from parameters: " << std::endl - << in_param.DebugString(); - // Create a copy of in_param with splits added where necessary. + << filtered_param.DebugString(); + // Create a copy of filtered_param with splits added where necessary. NetParameter param; - InsertSplits(in_param, ¶m); + InsertSplits(filtered_param, ¶m); // Basically, build all the layers and set up its connections. name_ = param.name(); map blob_name_to_idx; @@ -170,11 +174,79 @@ void Net::Init(const NetParameter& in_param) { template void Net::FilterNet(const NetParameter& param, NetParameter* param_filtered) { + const NetState& net_state = param.state(); + param_filtered->CopyFrom(param); + param_filtered->clear_layers(); + for (int i = 0; i < param.layers_size(); ++i) { + const LayerParameter& layer_param = param.layers(i); + const string& layer_name = layer_param.name(); + CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0) + << "Specify either include rules or exclude rules; not both."; + // If no include rules are specified, the layer is included by default and + // only excluded if it meets one of the exclude rules. + bool layer_included = (layer_param.include_size() == 0); + for (int j = 0; layer_included && j < layer_param.exclude_size(); ++j) { + if (StateMeetsRule(net_state, layer_param.exclude(j), layer_name)) { + layer_included = false; + } + } + for (int j = 0; !layer_included && j < layer_param.include_size(); ++j) { + if (StateMeetsRule(net_state, layer_param.include(j), layer_name)) { + layer_included = true; + } + } + if (layer_included) { + param_filtered->add_layers()->CopyFrom(layer_param); + } + } } template bool Net::StateMeetsRule(const NetState& state, const NetStateRule& rule, const string& layer_name) { + // Check whether the rule is broken due to phase. + if (rule.has_phase()) { + if (rule.phase() != state.phase()) { + LOG(INFO) << "The NetState phase (" << state.phase() + << ") differed from the phase (" << rule.phase() + << ") specified by a rule in layer " << layer_name; + return false; + } + } + // Check whether the rule is broken due to min level. + if (rule.has_min_level()) { + if (state.level() < rule.min_level()) { + LOG(INFO) << "The NetState level (" << state.level() + << ") is above the min_level (" << rule.min_level() + << " specified by a rule in layer " << layer_name; + return false; + } + } + // Check whether the rule is broken due to max level. + if (rule.has_max_level()) { + if (state.level() > rule.max_level()) { + LOG(INFO) << "The NetState level (" << state.level() + << ") is above the max_level (" << rule.max_level() + << " specified by a rule in layer " << layer_name; + return false; + } + } + // Check whether the rule is broken due to stage. If stage is specified, + // the NetState must contain ALL of the rule's stages to meet it. + if (rule.stage_size()) { + for (int i = 0; i < rule.stage_size(); ++i) { + // Check that the NetState contains the rule's ith stage. + bool has_stage = false; + for (int j = 0; !has_stage && j < state.stage_size(); ++j) { + if (rule.stage(i) == state.stage(j)) { has_stage = true; } + } + if (!has_stage) { + LOG(INFO) << "The NetState did not contain stage '" << rule.stage(i) + << "' specified by a rule in layer " << layer_name; + return false; + } + } + } return true; } diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 91738817875..11795f058cd 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -11,6 +11,7 @@ #include "caffe/solver.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" +#include "caffe/util/upgrade_proto.hpp" namespace caffe { @@ -42,40 +43,122 @@ void Solver::Init(const SolverParameter& param) { Caffe::set_random_seed(param_.random_seed()); } // Scaffolding code + InitTrainNet(); + InitTestNets(); + LOG(INFO) << "Solver scaffolding done."; +} + +template +void Solver::InitTrainNet() { + const int num_train_nets = param_.has_net() + param_.has_net_param() + + param_.has_train_net() + param_.has_train_net_param(); + const string& field_names = "net, net_param, train_net, train_net_param"; + CHECK_GE(num_train_nets, 1) << "SolverParameter must specify a train net " + << "using one of these fields: " << field_names; + CHECK_LE(num_train_nets, 1) << "SolverParameter must not contain more than " + << "one of these fields specifying a train_net: " << field_names; + NetParameter net_param; if (param_.has_train_net_param()) { - CHECK(!param_.has_train_net()) << "Either train_net_param or train_net may " - << "be specified, but not both."; - LOG(INFO) << "Creating training net specified in SolverParameter."; - net_.reset(new Net(param_.train_net_param())); - } else { - CHECK(param_.has_train_net()) - << "Neither train_net nor train_net_param were specified."; - LOG(INFO) << "Creating training net from file: " << param_.train_net(); - net_.reset(new Net(param_.train_net())); + LOG(INFO) << "Creating training net specified in train_net_param."; + net_param.CopyFrom(param_.train_net_param()); + } else if (param_.has_train_net()) { + LOG(INFO) << "Creating training net from train_net file: " + << param_.train_net(); + ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param); } - CHECK(net_) << "Training net uninitialized."; - net_->set_debug_info(param_.debug_info()); + if (param_.has_net_param()) { + LOG(INFO) << "Creating training net specified in net_param."; + net_param.CopyFrom(param_.net_param()); + } + if (param_.has_net()) { + LOG(INFO) << "Creating training net from net file: " << param_.net(); + ReadNetParamsFromTextFileOrDie(param_.net(), &net_param); + } + // Set the correct NetState. We start with the solver defaults (lowest + // precedence); then, merge in any NetState specified by the net_param itself; + // finally, merge in any NetState specified by the train_state (highest + // precedence). + NetState net_state; + net_state.set_phase(TRAIN); + net_state.MergeFrom(net_param.state()); + net_state.MergeFrom(param_.train_state()); + net_param.mutable_state()->CopyFrom(net_state); + net_.reset(new Net(net_param)); +} + +template +void Solver::InitTestNets() { + const bool has_net_param = param_.has_net_param(); + const bool has_net_file = param_.has_net(); + const int num_generic_nets = has_net_param + has_net_file; + CHECK_LE(num_generic_nets, 1) + << "Both net_param and net_file may not be specified."; const int num_test_net_params = param_.test_net_param_size(); const int num_test_net_files = param_.test_net_size(); const int num_test_nets = num_test_net_params + num_test_net_files; - if (num_test_nets) { - CHECK_EQ(param_.test_iter_size(), num_test_nets) - << "test_iter must be specified for each test network."; + if (num_generic_nets) { + CHECK_GE(param_.test_iter_size(), num_test_nets) + << "test_iter must be specified for each test network."; + } else { + CHECK_EQ(param_.test_iter_size(), num_test_nets) + << "test_iter must be specified for each test network."; + } + // If we have a generic net (specified by net or net_param, rather than + // test_net or test_net_param), we may have an unlimited number of actual + // test networks -- the actual number is given by the number of remaining + // test_iters after any test nets specified by test_net_param and/or test_net + // are evaluated. + const int num_generic_net_instances = param_.test_iter_size() - num_test_nets; + const int num_test_net_instances = num_test_nets + num_generic_net_instances; + if (param_.test_state_size()) { + CHECK_EQ(param_.test_state_size(), num_test_net_instances) + << "test_state must be unspecified or specified once per test net."; + } + if (num_test_net_instances) { CHECK_GT(param_.test_interval(), 0); } - test_nets_.resize(num_test_nets); - for (int i = 0; i < num_test_net_params; ++i) { - LOG(INFO) << "Creating testing net (#" << i - << ") specified in SolverParameter."; - test_nets_[i].reset(new Net(param_.test_net_param(i))); + int test_net_id = 0; + vector sources(num_test_net_instances); + vector net_params(num_test_net_instances); + for (int i = 0; i < num_test_net_params; ++i, ++test_net_id) { + sources[test_net_id] = "test_net_param"; + net_params[test_net_id].CopyFrom(param_.test_net_param(i)); } - for (int i = 0, test_net_id = num_test_net_params; - i < num_test_net_files; ++i, ++test_net_id) { - LOG(INFO) << "Creating testing net (#" << test_net_id - << ") from file: " << param.test_net(i); - test_nets_[test_net_id].reset(new Net(param_.test_net(i))); + for (int i = 0; i < num_test_net_files; ++i, ++test_net_id) { + sources[test_net_id] = "test_net file: " + param_.test_net(i); + ReadNetParamsFromTextFileOrDie(param_.test_net(i), + &net_params[test_net_id]); + } + const int remaining_test_nets = param_.test_iter_size() - test_net_id; + if (has_net_param) { + for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) { + sources[test_net_id] = "net_param"; + net_params[test_net_id].CopyFrom(param_.net_param()); + } + } + if (has_net_file) { + for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) { + sources[test_net_id] = "net file: " + param_.net(); + ReadNetParamsFromTextFileOrDie(param_.net(), &net_params[test_net_id]); + } + } + test_nets_.resize(num_test_net_instances); + for (int i = 0; i < num_test_net_instances; ++i) { + // Set the correct NetState. We start with the solver defaults (lowest + // precedence); then, merge in any NetState specified by the net_param + // itself; finally, merge in any NetState specified by the test_state + // (highest precedence). + NetState net_state; + net_state.set_phase(TEST); + net_state.MergeFrom(net_params[i].state()); + if (param_.test_state_size()) { + net_state.MergeFrom(param_.test_state(i)); + } + net_params[i].mutable_state()->CopyFrom(net_state); + LOG(INFO) + << "Creating testing net (#" << i << ") specified by " << sources[i]; + test_nets_[i].reset(new Net(net_params[i])); } - LOG(INFO) << "Solver scaffolding done."; } template From e526e2ddd71c0881b71f31bf522806f85f687648 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 18 Jul 2014 15:07:13 -0700 Subject: [PATCH 4/4] Use unified train/test nets in examples. --- examples/cifar10/cifar10_full_solver.prototxt | 6 +- .../cifar10/cifar10_full_solver_lr1.prototxt | 6 +- .../cifar10/cifar10_full_solver_lr2.prototxt | 6 +- examples/cifar10/cifar10_full_train.prototxt | 174 ------------ ...totxt => cifar10_full_train_test.prototxt} | 16 +- .../cifar10/cifar10_quick_solver.prototxt | 6 +- .../cifar10/cifar10_quick_solver_lr1.prototxt | 6 +- examples/cifar10/cifar10_quick_train.prototxt | 168 ------------ ...otxt => cifar10_quick_train_test.prototxt} | 16 +- examples/imagenet/alexnet_solver.prototxt | 3 +- ...in.prototxt => alexnet_train_val.prototxt} | 23 ++ examples/imagenet/alexnet_val.prototxt | 228 --------------- examples/imagenet/imagenet_solver.prototxt | 3 +- ...n.prototxt => imagenet_train_val.prototxt} | 23 ++ examples/imagenet/imagenet_val.prototxt | 228 --------------- examples/imagenet/readme.md | 7 +- .../mnist/lenet_consolidated_solver.prototxt | 259 ++---------------- examples/mnist/lenet_solver.prototxt | 6 +- examples/mnist/lenet_test.prototxt | 118 -------- ...ain.prototxt => lenet_train_test.prototxt} | 23 ++ ...in.prototxt => mnist_autoencoder.prototxt} | 28 ++ .../mnist/mnist_autoencoder_solver.prototxt | 3 +- .../mnist/mnist_autoencoder_test.prototxt | 146 ---------- examples/mnist/readme.md | 6 +- 24 files changed, 177 insertions(+), 1331 deletions(-) delete mode 100644 examples/cifar10/cifar10_full_train.prototxt rename examples/cifar10/{cifar10_full_test.prototxt => cifar10_full_train_test.prototxt} (89%) delete mode 100644 examples/cifar10/cifar10_quick_train.prototxt rename examples/cifar10/{cifar10_quick_test.prototxt => cifar10_quick_train_test.prototxt} (89%) rename examples/imagenet/{alexnet_train.prototxt => alexnet_train_val.prototxt} (91%) delete mode 100644 examples/imagenet/alexnet_val.prototxt rename examples/imagenet/{imagenet_train.prototxt => imagenet_train_val.prototxt} (91%) delete mode 100644 examples/imagenet/imagenet_val.prototxt delete mode 100644 examples/mnist/lenet_test.prototxt rename examples/mnist/{lenet_train.prototxt => lenet_train_test.prototxt} (82%) rename examples/mnist/{mnist_autoencoder_train.prototxt => mnist_autoencoder.prototxt} (88%) delete mode 100644 examples/mnist/mnist_autoencoder_test.prototxt diff --git a/examples/cifar10/cifar10_full_solver.prototxt b/examples/cifar10/cifar10_full_solver.prototxt index 0a0b456308d..49de3f58803 100644 --- a/examples/cifar10/cifar10_full_solver.prototxt +++ b/examples/cifar10/cifar10_full_solver.prototxt @@ -1,10 +1,8 @@ # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 # then another factor of 10 after 10 more epochs (5000 iters) -# The training protocol buffer definition -train_net: "cifar10_full_train.prototxt" -# The testing protocol buffer definition -test_net: "cifar10_full_test.prototxt" +# The train/test net protocol buffer definition +net: "cifar10_full_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. diff --git a/examples/cifar10/cifar10_full_solver_lr1.prototxt b/examples/cifar10/cifar10_full_solver_lr1.prototxt index 4376de5493f..746f4fba15a 100644 --- a/examples/cifar10/cifar10_full_solver_lr1.prototxt +++ b/examples/cifar10/cifar10_full_solver_lr1.prototxt @@ -1,10 +1,8 @@ # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 # then another factor of 10 after 10 more epochs (5000 iters) -# The training protocol buffer definition -train_net: "cifar10_full_train.prototxt" -# The testing protocol buffer definition -test_net: "cifar10_full_test.prototxt" +# The train/test net protocol buffer definition +net: "cifar10_full_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. diff --git a/examples/cifar10/cifar10_full_solver_lr2.prototxt b/examples/cifar10/cifar10_full_solver_lr2.prototxt index 19580c5184a..5a549ffc96d 100644 --- a/examples/cifar10/cifar10_full_solver_lr2.prototxt +++ b/examples/cifar10/cifar10_full_solver_lr2.prototxt @@ -1,10 +1,8 @@ # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 # then another factor of 10 after 10 more epochs (5000 iters) -# The training protocol buffer definition -train_net: "cifar10_full_train.prototxt" -# The testing protocol buffer definition -test_net: "cifar10_full_test.prototxt" +# The train/test net protocol buffer definition +net: "cifar10_full_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. diff --git a/examples/cifar10/cifar10_full_train.prototxt b/examples/cifar10/cifar10_full_train.prototxt deleted file mode 100644 index 25c76060991..00000000000 --- a/examples/cifar10/cifar10_full_train.prototxt +++ /dev/null @@ -1,174 +0,0 @@ -name: "CIFAR10_full_train" -layers { - name: "cifar" - type: DATA - top: "data" - top: "label" - data_param { - source: "cifar10-leveldb/cifar-train-leveldb" - mean_file: "mean.binaryproto" - batch_size: 100 - } -} -layers { - name: "conv1" - type: CONVOLUTION - bottom: "data" - top: "conv1" - blobs_lr: 1 - blobs_lr: 2 - convolution_param { - num_output: 32 - pad: 2 - kernel_size: 5 - stride: 1 - weight_filler { - type: "gaussian" - std: 0.0001 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "pool1" - type: POOLING - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "relu1" - type: RELU - bottom: "pool1" - top: "pool1" -} -layers { - name: "norm1" - type: LRN - bottom: "pool1" - top: "norm1" - lrn_param { - norm_region: WITHIN_CHANNEL - local_size: 3 - alpha: 5e-05 - beta: 0.75 - } -} -layers { - name: "conv2" - type: CONVOLUTION - bottom: "norm1" - top: "conv2" - blobs_lr: 1 - blobs_lr: 2 - convolution_param { - num_output: 32 - pad: 2 - kernel_size: 5 - stride: 1 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "relu2" - type: RELU - bottom: "conv2" - top: "conv2" -} -layers { - name: "pool2" - type: POOLING - bottom: "conv2" - top: "pool2" - pooling_param { - pool: AVE - kernel_size: 3 - stride: 2 - } -} -layers { - name: "norm2" - type: LRN - bottom: "pool2" - top: "norm2" - lrn_param { - norm_region: WITHIN_CHANNEL - local_size: 3 - alpha: 5e-05 - beta: 0.75 - } -} -layers { - name: "conv3" - type: CONVOLUTION - bottom: "norm2" - top: "conv3" - convolution_param { - num_output: 64 - pad: 2 - kernel_size: 5 - stride: 1 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "relu3" - type: RELU - bottom: "conv3" - top: "conv3" -} -layers { - name: "pool3" - type: POOLING - bottom: "conv3" - top: "pool3" - pooling_param { - pool: AVE - kernel_size: 3 - stride: 2 - } -} -layers { - name: "ip1" - type: INNER_PRODUCT - bottom: "pool3" - top: "ip1" - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 250 - weight_decay: 0 - inner_product_param { - num_output: 10 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "ip1" - bottom: "label" -} diff --git a/examples/cifar10/cifar10_full_test.prototxt b/examples/cifar10/cifar10_full_train_test.prototxt similarity index 89% rename from examples/cifar10/cifar10_full_test.prototxt rename to examples/cifar10/cifar10_full_train_test.prototxt index 1f77b4f0348..4fd52fec133 100644 --- a/examples/cifar10/cifar10_full_test.prototxt +++ b/examples/cifar10/cifar10_full_train_test.prototxt @@ -1,4 +1,16 @@ -name: "CIFAR10_full_test" +name: "CIFAR10_full" +layers { + name: "cifar" + type: DATA + top: "data" + top: "label" + data_param { + source: "cifar10-leveldb/cifar-train-leveldb" + mean_file: "mean.binaryproto" + batch_size: 100 + } + include: { phase: TRAIN } +} layers { name: "cifar" type: DATA @@ -9,6 +21,7 @@ layers { mean_file: "mean.binaryproto" batch_size: 100 } + include: { phase: TEST } } layers { name: "conv1" @@ -172,6 +185,7 @@ layers { bottom: "ip1" bottom: "label" top: "accuracy" + include: { phase: TEST } } layers { name: "loss" diff --git a/examples/cifar10/cifar10_quick_solver.prototxt b/examples/cifar10/cifar10_quick_solver.prototxt index 4b547cc96f4..cdd0722b3a0 100644 --- a/examples/cifar10/cifar10_quick_solver.prototxt +++ b/examples/cifar10/cifar10_quick_solver.prototxt @@ -1,9 +1,7 @@ # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10 -# The training protocol buffer definition -train_net: "cifar10_quick_train.prototxt" -# The testing protocol buffer definition -test_net: "cifar10_quick_test.prototxt" +# The train/test net protocol buffer definition +net: "cifar10_quick_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. diff --git a/examples/cifar10/cifar10_quick_solver_lr1.prototxt b/examples/cifar10/cifar10_quick_solver_lr1.prototxt index d4ba3d525d9..2ed54ad980f 100644 --- a/examples/cifar10/cifar10_quick_solver_lr1.prototxt +++ b/examples/cifar10/cifar10_quick_solver_lr1.prototxt @@ -1,9 +1,7 @@ # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10 -# The training protocol buffer definition -train_net: "cifar10_quick_train.prototxt" -# The testing protocol buffer definition -test_net: "cifar10_quick_test.prototxt" +# The train/test net protocol buffer definition +net: "cifar10_quick_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. diff --git a/examples/cifar10/cifar10_quick_train.prototxt b/examples/cifar10/cifar10_quick_train.prototxt deleted file mode 100644 index de5b6c32c5d..00000000000 --- a/examples/cifar10/cifar10_quick_train.prototxt +++ /dev/null @@ -1,168 +0,0 @@ -name: "CIFAR10_quick_train" -layers { - name: "cifar" - type: DATA - top: "data" - top: "label" - data_param { - source: "cifar10-leveldb/cifar-train-leveldb" - mean_file: "mean.binaryproto" - batch_size: 100 - } -} -layers { - name: "conv1" - type: CONVOLUTION - bottom: "data" - top: "conv1" - blobs_lr: 1 - blobs_lr: 2 - convolution_param { - num_output: 32 - pad: 2 - kernel_size: 5 - stride: 1 - weight_filler { - type: "gaussian" - std: 0.0001 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "pool1" - type: POOLING - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "relu1" - type: RELU - bottom: "pool1" - top: "pool1" -} -layers { - name: "conv2" - type: CONVOLUTION - bottom: "pool1" - top: "conv2" - blobs_lr: 1 - blobs_lr: 2 - convolution_param { - num_output: 32 - pad: 2 - kernel_size: 5 - stride: 1 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "relu2" - type: RELU - bottom: "conv2" - top: "conv2" -} -layers { - name: "pool2" - type: POOLING - bottom: "conv2" - top: "pool2" - pooling_param { - pool: AVE - kernel_size: 3 - stride: 2 - } -} -layers { - name: "conv3" - type: CONVOLUTION - bottom: "pool2" - top: "conv3" - blobs_lr: 1 - blobs_lr: 2 - convolution_param { - num_output: 64 - pad: 2 - kernel_size: 5 - stride: 1 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "relu3" - type: RELU - bottom: "conv3" - top: "conv3" -} -layers { - name: "pool3" - type: POOLING - bottom: "conv3" - top: "pool3" - pooling_param { - pool: AVE - kernel_size: 3 - stride: 2 - } -} -layers { - name: "ip1" - type: INNER_PRODUCT - bottom: "pool3" - top: "ip1" - blobs_lr: 1 - blobs_lr: 2 - inner_product_param { - num_output: 64 - weight_filler { - type: "gaussian" - std: 0.1 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "ip2" - type: INNER_PRODUCT - bottom: "ip1" - top: "ip2" - blobs_lr: 1 - blobs_lr: 2 - inner_product_param { - num_output: 10 - weight_filler { - type: "gaussian" - std: 0.1 - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "ip2" - bottom: "label" -} diff --git a/examples/cifar10/cifar10_quick_test.prototxt b/examples/cifar10/cifar10_quick_train_test.prototxt similarity index 89% rename from examples/cifar10/cifar10_quick_test.prototxt rename to examples/cifar10/cifar10_quick_train_test.prototxt index aa82c32aa24..b34d1cd2fcb 100644 --- a/examples/cifar10/cifar10_quick_test.prototxt +++ b/examples/cifar10/cifar10_quick_train_test.prototxt @@ -1,4 +1,16 @@ -name: "CIFAR10_quick_test" +name: "CIFAR10_quick" +layers { + name: "cifar" + type: DATA + top: "data" + top: "label" + data_param { + source: "cifar10-leveldb/cifar-train-leveldb" + mean_file: "mean.binaryproto" + batch_size: 100 + } + include: { phase: TRAIN } +} layers { name: "cifar" type: DATA @@ -9,6 +21,7 @@ layers { mean_file: "mean.binaryproto" batch_size: 100 } + include: { phase: TEST } } layers { name: "conv1" @@ -166,6 +179,7 @@ layers { bottom: "ip2" bottom: "label" top: "accuracy" + include: { phase: TEST } } layers { name: "loss" diff --git a/examples/imagenet/alexnet_solver.prototxt b/examples/imagenet/alexnet_solver.prototxt index 75d0d5dffa7..8581e99c6a7 100644 --- a/examples/imagenet/alexnet_solver.prototxt +++ b/examples/imagenet/alexnet_solver.prototxt @@ -1,5 +1,4 @@ -train_net: "alexnet_train.prototxt" -test_net: "alexnet_val.prototxt" +net: "alexnet_train_val.prototxt" test_iter: 1000 test_interval: 1000 base_lr: 0.01 diff --git a/examples/imagenet/alexnet_train.prototxt b/examples/imagenet/alexnet_train_val.prototxt similarity index 91% rename from examples/imagenet/alexnet_train.prototxt rename to examples/imagenet/alexnet_train_val.prototxt index 32a96cfd4d9..f65f3e7f87e 100644 --- a/examples/imagenet/alexnet_train.prototxt +++ b/examples/imagenet/alexnet_train_val.prototxt @@ -2,6 +2,8 @@ name: "AlexNet" layers { name: "data" type: DATA + top: "data" + top: "label" data_param { source: "ilsvrc12_train_leveldb" mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto" @@ -9,8 +11,21 @@ layers { crop_size: 227 mirror: true } + include: { phase: TRAIN } +} +layers { + name: "data" + type: DATA top: "data" top: "label" + data_param { + source: "ilsvrc12_val_leveldb" + mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto" + batch_size: 50 + crop_size: 227 + mirror: false + } + include: { phase: TEST } } layers { name: "conv1" @@ -308,6 +323,14 @@ layers { bottom: "fc7" top: "fc8" } +layers { + name: "accuracy" + type: ACCURACY + bottom: "fc8" + bottom: "label" + top: "accuracy" + include: { phase: TEST } +} layers { name: "loss" type: SOFTMAX_LOSS diff --git a/examples/imagenet/alexnet_val.prototxt b/examples/imagenet/alexnet_val.prototxt deleted file mode 100644 index 1d8d86b78ff..00000000000 --- a/examples/imagenet/alexnet_val.prototxt +++ /dev/null @@ -1,228 +0,0 @@ -name: "AlexNet" -layers { - name: "data" - type: DATA - data_param { - source: "ilsvrc12_val_leveldb" - mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto" - batch_size: 50 - crop_size: 227 - mirror: false - } - top: "data" - top: "label" -} -layers { - name: "conv1" - type: CONVOLUTION - convolution_param { - num_output: 96 - kernel_size: 11 - stride: 4 - } - bottom: "data" - top: "conv1" -} -layers { - name: "relu1" - type: RELU - bottom: "conv1" - top: "conv1" -} -layers { - name: "norm1" - type: LRN - lrn_param { - local_size: 5 - alpha: 0.0001 - beta: 0.75 - } - bottom: "conv1" - top: "norm1" -} -layers { - name: "pool1" - type: POOLING - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - bottom: "norm1" - top: "pool1" -} -layers { - name: "conv2" - type: CONVOLUTION - convolution_param { - num_output: 256 - pad: 2 - kernel_size: 5 - group: 2 - } - bottom: "pool1" - top: "conv2" -} -layers { - name: "relu2" - type: RELU - bottom: "conv2" - top: "conv2" -} -layers { - name: "norm2" - type: LRN - lrn_param { - local_size: 5 - alpha: 0.0001 - beta: 0.75 - } - bottom: "conv2" - top: "norm2" -} -layers { - name: "pool2" - type: POOLING - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - bottom: "norm2" - top: "pool2" -} -layers { - name: "conv3" - type: CONVOLUTION - convolution_param { - num_output: 384 - pad: 1 - kernel_size: 3 - } - bottom: "pool2" - top: "conv3" -} -layers { - name: "relu3" - type: RELU - bottom: "conv3" - top: "conv3" -} -layers { - name: "conv4" - type: CONVOLUTION - convolution_param { - num_output: 384 - pad: 1 - kernel_size: 3 - group: 2 - } - bottom: "conv3" - top: "conv4" -} -layers { - name: "relu4" - type: RELU - bottom: "conv4" - top: "conv4" -} -layers { - name: "conv5" - type: CONVOLUTION - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - group: 2 - } - bottom: "conv4" - top: "conv5" -} -layers { - name: "relu5" - type: RELU - bottom: "conv5" - top: "conv5" -} -layers { - name: "pool5" - type: POOLING - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } - bottom: "conv5" - top: "pool5" -} -layers { - name: "fc6" - type: INNER_PRODUCT - inner_product_param { - num_output: 4096 - } - bottom: "pool5" - top: "fc6" -} -layers { - name: "relu6" - type: RELU - bottom: "fc6" - top: "fc6" -} -layers { - name: "drop6" - type: DROPOUT - dropout_param { - dropout_ratio: 0.5 - } - bottom: "fc6" - top: "fc6" -} -layers { - name: "fc7" - type: INNER_PRODUCT - inner_product_param { - num_output: 4096 - } - bottom: "fc6" - top: "fc7" -} -layers { - name: "relu7" - type: RELU - bottom: "fc7" - top: "fc7" -} -layers { - name: "drop7" - type: DROPOUT - dropout_param { - dropout_ratio: 0.5 - } - bottom: "fc7" - top: "fc7" -} -layers { - name: "fc8" - type: INNER_PRODUCT - inner_product_param { - num_output: 1000 - } - bottom: "fc7" - top: "fc8" -} -layers { - name: "accuracy" - type: ACCURACY - bottom: "fc8" - bottom: "label" - top: "accuracy" -} -layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "fc8" - bottom: "label" - top: "loss" -} diff --git a/examples/imagenet/imagenet_solver.prototxt b/examples/imagenet/imagenet_solver.prototxt index e543ba66cad..aedec4104a6 100644 --- a/examples/imagenet/imagenet_solver.prototxt +++ b/examples/imagenet/imagenet_solver.prototxt @@ -1,5 +1,4 @@ -train_net: "imagenet_train.prototxt" -test_net: "imagenet_val.prototxt" +net: "imagenet_train_val.prototxt" test_iter: 1000 test_interval: 1000 base_lr: 0.01 diff --git a/examples/imagenet/imagenet_train.prototxt b/examples/imagenet/imagenet_train_val.prototxt similarity index 91% rename from examples/imagenet/imagenet_train.prototxt rename to examples/imagenet/imagenet_train_val.prototxt index 519d4509be9..af28c1495e5 100644 --- a/examples/imagenet/imagenet_train.prototxt +++ b/examples/imagenet/imagenet_train_val.prototxt @@ -11,6 +11,21 @@ layers { crop_size: 227 mirror: true } + include: { phase: TRAIN } +} +layers { + name: "data" + type: DATA + top: "data" + top: "label" + data_param { + source: "ilsvrc12_val_leveldb" + mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto" + batch_size: 50 + crop_size: 227 + mirror: false + } + include: { phase: TEST } } layers { name: "conv1" @@ -308,6 +323,14 @@ layers { } } } +layers { + name: "accuracy" + type: ACCURACY + bottom: "fc8" + bottom: "label" + top: "accuracy" + include: { phase: TEST } +} layers { name: "loss" type: SOFTMAX_LOSS diff --git a/examples/imagenet/imagenet_val.prototxt b/examples/imagenet/imagenet_val.prototxt deleted file mode 100644 index 8be5150cdd2..00000000000 --- a/examples/imagenet/imagenet_val.prototxt +++ /dev/null @@ -1,228 +0,0 @@ -name: "CaffeNet" -layers { - name: "data" - type: DATA - top: "data" - top: "label" - data_param { - source: "ilsvrc12_val_leveldb" - mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto" - batch_size: 50 - crop_size: 227 - mirror: false - } -} -layers { - name: "conv1" - type: CONVOLUTION - bottom: "data" - top: "conv1" - convolution_param { - num_output: 96 - kernel_size: 11 - stride: 4 - } -} -layers { - name: "relu1" - type: RELU - bottom: "conv1" - top: "conv1" -} -layers { - name: "pool1" - type: POOLING - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "norm1" - type: LRN - bottom: "pool1" - top: "norm1" - lrn_param { - local_size: 5 - alpha: 0.0001 - beta: 0.75 - } -} -layers { - name: "conv2" - type: CONVOLUTION - bottom: "norm1" - top: "conv2" - convolution_param { - num_output: 256 - pad: 2 - kernel_size: 5 - group: 2 - } -} -layers { - name: "relu2" - type: RELU - bottom: "conv2" - top: "conv2" -} -layers { - name: "pool2" - type: POOLING - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "norm2" - type: LRN - bottom: "pool2" - top: "norm2" - lrn_param { - local_size: 5 - alpha: 0.0001 - beta: 0.75 - } -} -layers { - name: "conv3" - type: CONVOLUTION - bottom: "norm2" - top: "conv3" - convolution_param { - num_output: 384 - pad: 1 - kernel_size: 3 - } -} -layers { - name: "relu3" - type: RELU - bottom: "conv3" - top: "conv3" -} -layers { - name: "conv4" - type: CONVOLUTION - bottom: "conv3" - top: "conv4" - convolution_param { - num_output: 384 - pad: 1 - kernel_size: 3 - group: 2 - } -} -layers { - name: "relu4" - type: RELU - bottom: "conv4" - top: "conv4" -} -layers { - name: "conv5" - type: CONVOLUTION - bottom: "conv4" - top: "conv5" - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - group: 2 - } -} -layers { - name: "relu5" - type: RELU - bottom: "conv5" - top: "conv5" -} -layers { - name: "pool5" - type: POOLING - bottom: "conv5" - top: "pool5" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "fc6" - type: INNER_PRODUCT - bottom: "pool5" - top: "fc6" - inner_product_param { - num_output: 4096 - } -} -layers { - name: "relu6" - type: RELU - bottom: "fc6" - top: "fc6" -} -layers { - name: "drop6" - type: DROPOUT - bottom: "fc6" - top: "fc6" - dropout_param { - dropout_ratio: 0.5 - } -} -layers { - name: "fc7" - type: INNER_PRODUCT - bottom: "fc6" - top: "fc7" - inner_product_param { - num_output: 4096 - } -} -layers { - name: "relu7" - type: RELU - bottom: "fc7" - top: "fc7" -} -layers { - name: "drop7" - type: DROPOUT - bottom: "fc7" - top: "fc7" - dropout_param { - dropout_ratio: 0.5 - } -} -layers { - name: "fc8" - type: INNER_PRODUCT - bottom: "fc7" - top: "fc8" - inner_product_param { - num_output: 1000 - } -} -layers { - name: "accuracy" - type: ACCURACY - bottom: "fc8" - bottom: "label" - top: "accuracy" -} -layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "fc8" - bottom: "label" - top: "loss" -} \ No newline at end of file diff --git a/examples/imagenet/readme.md b/examples/imagenet/readme.md index e74e6b86996..0b5b8452188 100644 --- a/examples/imagenet/readme.md +++ b/examples/imagenet/readme.md @@ -72,12 +72,11 @@ We will also lay out a protocol buffer for running the solver. Let's make a few * The network will be trained with momentum 0.9 and a weight decay of 0.0005. * For every 10,000 iterations, we will take a snapshot of the current status. -Sound good? This is implemented in `examples/imagenet/imagenet_solver.prototxt`. Again, you will need to change the first two lines: +Sound good? This is implemented in `examples/imagenet/imagenet_solver.prototxt`. Again, you will need to change the first line: - train_net: "imagenet_train.prototxt" - test_net: "imagenet_val.prototxt" + net: "imagenet_train_val.prototxt" -to point to the actual path if you have changed them. +to point to the actual path if you have changed it. Training ImageNet ----------------- diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt index ef851e0f656..980f9382066 100644 --- a/examples/mnist/lenet_consolidated_solver.prototxt +++ b/examples/mnist/lenet_consolidated_solver.prototxt @@ -30,8 +30,19 @@ random_seed: 1701 # solver mode: CPU or GPU solver_mode: GPU -# The training protocol buffer definition -train_net_param { +# We test on both the test and train set using "stages". The TEST DATA layers +# each have a stage, either 'test-on-train-set' or 'test-on-test-set'. +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +test_state: { stage: "test-on-test-set" } +# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K). +test_iter: 600 +test_state: { stage: "test-on-train-set" } + +# The net protocol buffer definition +net_param { name: "LeNet" layers { name: "mnist" @@ -43,122 +54,8 @@ train_net_param { scale: 0.00390625 batch_size: 64 } + include: { phase: TRAIN } } - layers { - name: "conv1" - type: CONVOLUTION - bottom: "data" - top: "conv1" - blobs_lr: 1 - blobs_lr: 2 - convolution_param { - num_output: 20 - kernel_size: 5 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - } - layers { - name: "pool1" - type: POOLING - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } - } - layers { - name: "conv2" - type: CONVOLUTION - bottom: "pool1" - top: "conv2" - blobs_lr: 1 - blobs_lr: 2 - convolution_param { - num_output: 50 - kernel_size: 5 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - } - layers { - name: "pool2" - type: POOLING - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } - } - layers { - name: "ip1" - type: INNER_PRODUCT - bottom: "pool2" - top: "ip1" - blobs_lr: 1 - blobs_lr: 2 - inner_product_param { - num_output: 500 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - } - layers { - name: "relu1" - type: RELU - bottom: "ip1" - top: "ip1" - } - layers { - name: "ip2" - type: INNER_PRODUCT - bottom: "ip1" - top: "ip2" - blobs_lr: 1 - blobs_lr: 2 - inner_product_param { - num_output: 10 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - } - layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "ip2" - bottom: "label" - } -} - -# test_iter specifies how many forward passes the test should carry out. -# In the case of MNIST, we have test batch size 100 and 100 test iterations, -# covering the full 10,000 testing images. -test_iter: 100 -# The testing protocol buffer definition -test_net_param { - name: "LeNet-test" layers { name: "mnist" type: DATA @@ -169,120 +66,11 @@ test_net_param { scale: 0.00390625 batch_size: 100 } - } - layers { - name: "conv1" - type: CONVOLUTION - bottom: "data" - top: "conv1" - convolution_param { - num_output: 20 - kernel_size: 5 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - } - layers { - name: "pool1" - type: POOLING - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } - } - layers { - name: "conv2" - type: CONVOLUTION - bottom: "pool1" - top: "conv2" - convolution_param { - num_output: 50 - kernel_size: 5 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - } - layers { - name: "pool2" - type: POOLING - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } - } - layers { - name: "ip1" - type: INNER_PRODUCT - bottom: "pool2" - top: "ip1" - inner_product_param { - num_output: 500 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } + include: { + phase: TEST + stage: "test-on-test-set" } } - layers { - name: "relu1" - type: RELU - bottom: "ip1" - top: "ip1" - } - layers { - name: "ip2" - type: INNER_PRODUCT - bottom: "ip1" - top: "ip2" - inner_product_param { - num_output: 10 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } - } - layers { - name: "accuracy" - type: ACCURACY - bottom: "ip2" - bottom: "label" - top: "accuracy" - } - layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "ip2" - bottom: "label" - top: "loss" - } -} - -# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K). -test_iter: 600 -# The protocol buffer definition to test on the train set -test_net_param { - name: "LeNet-test-on-train" layers { name: "mnist" type: DATA @@ -293,12 +81,18 @@ test_net_param { scale: 0.00390625 batch_size: 100 } + include: { + phase: TEST + stage: "test-on-train-set" + } } layers { name: "conv1" type: CONVOLUTION bottom: "data" top: "conv1" + blobs_lr: 1 + blobs_lr: 2 convolution_param { num_output: 20 kernel_size: 5 @@ -327,6 +121,8 @@ test_net_param { type: CONVOLUTION bottom: "pool1" top: "conv2" + blobs_lr: 1 + blobs_lr: 2 convolution_param { num_output: 50 kernel_size: 5 @@ -355,6 +151,8 @@ test_net_param { type: INNER_PRODUCT bottom: "pool2" top: "ip1" + blobs_lr: 1 + blobs_lr: 2 inner_product_param { num_output: 500 weight_filler { @@ -376,6 +174,8 @@ test_net_param { type: INNER_PRODUCT bottom: "ip1" top: "ip2" + blobs_lr: 1 + blobs_lr: 2 inner_product_param { num_output: 10 weight_filler { @@ -392,6 +192,7 @@ test_net_param { bottom: "ip2" bottom: "label" top: "accuracy" + include: { phase: TEST } } layers { name: "loss" diff --git a/examples/mnist/lenet_solver.prototxt b/examples/mnist/lenet_solver.prototxt index 7947f2d6a73..a3b33090472 100644 --- a/examples/mnist/lenet_solver.prototxt +++ b/examples/mnist/lenet_solver.prototxt @@ -1,7 +1,5 @@ -# The training protocol buffer definition -train_net: "lenet_train.prototxt" -# The testing protocol buffer definition -test_net: "lenet_test.prototxt" +# The train/test net protocol buffer definition +net: "lenet_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. diff --git a/examples/mnist/lenet_test.prototxt b/examples/mnist/lenet_test.prototxt deleted file mode 100644 index 2497f02ae86..00000000000 --- a/examples/mnist/lenet_test.prototxt +++ /dev/null @@ -1,118 +0,0 @@ -name: "LeNet-test" -layers { - name: "mnist" - type: DATA - top: "data" - top: "label" - data_param { - source: "mnist-test-leveldb" - scale: 0.00390625 - batch_size: 100 - } -} -layers { - name: "conv1" - type: CONVOLUTION - bottom: "data" - top: "conv1" - convolution_param { - num_output: 20 - kernel_size: 5 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "pool1" - type: POOLING - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } -} -layers { - name: "conv2" - type: CONVOLUTION - bottom: "pool1" - top: "conv2" - convolution_param { - num_output: 50 - kernel_size: 5 - stride: 1 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "pool2" - type: POOLING - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } -} -layers { - name: "ip1" - type: INNER_PRODUCT - bottom: "pool2" - top: "ip1" - inner_product_param { - num_output: 500 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "relu1" - type: RELU - bottom: "ip1" - top: "ip1" -} -layers { - name: "ip2" - type: INNER_PRODUCT - bottom: "ip1" - top: "ip2" - inner_product_param { - num_output: 10 - weight_filler { - type: "xavier" - } - bias_filler { - type: "constant" - } - } -} -layers { - name: "accuracy" - type: ACCURACY - bottom: "ip2" - bottom: "label" - top: "accuracy" -} -layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "ip2" - bottom: "label" - top: "loss" -} diff --git a/examples/mnist/lenet_train.prototxt b/examples/mnist/lenet_train_test.prototxt similarity index 82% rename from examples/mnist/lenet_train.prototxt rename to examples/mnist/lenet_train_test.prototxt index e8a1e74e40b..3c77452130c 100644 --- a/examples/mnist/lenet_train.prototxt +++ b/examples/mnist/lenet_train_test.prototxt @@ -9,7 +9,21 @@ layers { scale: 0.00390625 batch_size: 64 } + include: { phase: TRAIN } } +layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "mnist-test-leveldb" + scale: 0.00390625 + batch_size: 100 + } + include: { phase: TEST } +} + layers { name: "conv1" type: CONVOLUTION @@ -110,9 +124,18 @@ layers { } } } +layers { + name: "accuracy" + type: ACCURACY + bottom: "ip2" + bottom: "label" + top: "accuracy" + include: { phase: TEST } +} layers { name: "loss" type: SOFTMAX_LOSS bottom: "ip2" bottom: "label" + top: "loss" } diff --git a/examples/mnist/mnist_autoencoder_train.prototxt b/examples/mnist/mnist_autoencoder.prototxt similarity index 88% rename from examples/mnist/mnist_autoencoder_train.prototxt rename to examples/mnist/mnist_autoencoder.prototxt index 90d2cff99b8..ad1e7665bf2 100644 --- a/examples/mnist/mnist_autoencoder_train.prototxt +++ b/examples/mnist/mnist_autoencoder.prototxt @@ -8,6 +8,18 @@ layers { scale: 0.0039215684 batch_size: 100 } + include: { phase: TRAIN } +} +layers { + top: "data" + name: "data" + type: DATA + data_param { + source: "mnist-test-leveldb" + scale: 0.0039215684 + batch_size: 100 + } + include: { phase: TEST } } layers { bottom: "data" @@ -232,4 +244,20 @@ layers { bottom: "flatdata" name: "loss" type: SIGMOID_CROSS_ENTROPY_LOSS + include: { phase: TRAIN } +} +layers { + bottom: "decode1" + top: "decode1neuron" + name: "decode1neuron" + type: SIGMOID + include: { phase: TEST } +} +layers { + bottom: "decode1neuron" + bottom: "flatdata" + name: "loss" + type: EUCLIDEAN_LOSS + top: "loss" + include: { phase: TEST } } diff --git a/examples/mnist/mnist_autoencoder_solver.prototxt b/examples/mnist/mnist_autoencoder_solver.prototxt index 06e057d53a4..ae1ddebccd2 100644 --- a/examples/mnist/mnist_autoencoder_solver.prototxt +++ b/examples/mnist/mnist_autoencoder_solver.prototxt @@ -1,5 +1,4 @@ -train_net: "mnist_autoencoder_train.prototxt" -test_net: "mnist_autoencoder_test.prototxt" +net: "mnist_autoencoder.prototxt" test_iter: 50 test_interval: 100 test_compute_loss: true diff --git a/examples/mnist/mnist_autoencoder_test.prototxt b/examples/mnist/mnist_autoencoder_test.prototxt deleted file mode 100644 index b52364c17fc..00000000000 --- a/examples/mnist/mnist_autoencoder_test.prototxt +++ /dev/null @@ -1,146 +0,0 @@ -name: "MNISTAutoencoder" -layers { - top: "data" - name: "data" - type: DATA - data_param { - source: "mnist-test-leveldb" - scale: 0.0039215684 - batch_size: 100 - } -} -layers { - bottom: "data" - top: "flatdata" - name: "flatdata" - type: FLATTEN -} -layers { - bottom: "data" - top: "encode1" - name: "encode1" - type: INNER_PRODUCT - inner_product_param { - num_output: 1000 - } -} -layers { - bottom: "encode1" - top: "encode1neuron" - name: "encode1neuron" - type: SIGMOID -} -layers { - bottom: "encode1neuron" - top: "encode2" - name: "encode2" - type: INNER_PRODUCT - inner_product_param { - num_output: 500 - } -} -layers { - bottom: "encode2" - top: "encode2neuron" - name: "encode2neuron" - type: SIGMOID -} -layers { - bottom: "encode2neuron" - top: "encode3" - name: "encode3" - type: INNER_PRODUCT - inner_product_param { - num_output: 250 - } -} -layers { - bottom: "encode3" - top: "encode3neuron" - name: "encode3neuron" - type: SIGMOID -} -layers { - bottom: "encode3neuron" - top: "encode4" - name: "encode4" - type: INNER_PRODUCT - blobs_lr: 1 - blobs_lr: 1 - weight_decay: 1 - weight_decay: 0 - inner_product_param { - num_output: 30 - } -} -layers { - bottom: "encode4" - top: "decode4" - name: "decode4" - type: INNER_PRODUCT - blobs_lr: 1 - blobs_lr: 1 - weight_decay: 1 - weight_decay: 0 - inner_product_param { - num_output: 250 - } -} -layers { - bottom: "decode4" - top: "decode4neuron" - name: "decode4neuron" - type: SIGMOID -} -layers { - bottom: "decode4neuron" - top: "decode3" - name: "decode3" - type: INNER_PRODUCT - inner_product_param { - num_output: 500 - } -} -layers { - bottom: "decode3" - top: "decode3neuron" - name: "decode3neuron" - type: SIGMOID -} -layers { - bottom: "decode3neuron" - top: "decode2" - name: "decode2" - type: INNER_PRODUCT - inner_product_param { - num_output: 1000 - } -} -layers { - bottom: "decode2" - top: "decode2neuron" - name: "decode2neuron" - type: SIGMOID -} -layers { - bottom: "decode2neuron" - top: "decode1" - name: "decode1" - type: INNER_PRODUCT - inner_product_param { - num_output: 784 - } -} -layers { - bottom: "decode1" - top: "decode1neuron" - name: "decode1neuron" - type: SIGMOID -} -layers { - bottom: "decode1neuron" - bottom: "flatdata" - name: "loss" - type: EUCLIDEAN_LOSS - top: "loss" -} diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md index 4f3f4d9ce12..65a780714ae 100644 --- a/examples/mnist/readme.md +++ b/examples/mnist/readme.md @@ -177,10 +177,8 @@ The `softmax_loss` layer implements both the softmax and the multinomial logisti Check out the comments explaining each line in the prototxt: - # The training protocol buffer definition - train_net: "lenet_train.prototxt" - # The testing protocol buffer definition - test_net: "lenet_test.prototxt" + # The train/test net protocol buffer definition + net: "lenet_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images.