From c2b74c3cf66b229c0ad926cf052c7e7ed522038d Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 17 Jul 2014 20:12:44 -0700
Subject: [PATCH 1/4] Add NetState message with phase, level, stage;
 NetStateRule message with filtering rules for Layers.

---
 src/caffe/proto/caffe.proto | 95 +++++++++++++++++++++++++++++++------
 1 file changed, 81 insertions(+), 14 deletions(-)

diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index c27e9e5aebe..d2f61e57f34 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -56,24 +56,55 @@ message NetParameter {
   // If set False, then whether to carry out backward is determined
   // automatically according to the net structure and learning rates.
   optional bool force_backward = 5 [default = false];
+  // The current "state" of the network, including the phase, level, and stage.
+  // Some layers may be included/excluded depending on this state and the states
+  // specified in the layers' include and exclude fields.
+  optional NetState state = 6;
 }
 
+// NOTE
+// Update the next available ID when you add a new SolverParameter field.
+//
+// SolverParameter next available ID: 27 (last added: test_state)
 message SolverParameter {
-  // {train,test}_net specify a path to a file containing the {train,test} net
-  // parameters; {train,test}_net_param specify the net parameters directly
-  // inside the SolverParameter.
+  //////////////////////////////////////////////////////////////////////////////
+  // Specifying the train and test networks
   //
-  // Only either train_net or train_net_param (not both) should be specified.
-  // You may specify 0 or more test_net and/or test_net_param.  All
-  // nets specified using test_net_param will be tested first, followed by all
-  // nets specified using test_net (each processed in the order specified in
-  // the prototxt).
-  optional string train_net = 1; // The proto filename for the train net.
-  repeated string test_net = 2; // The proto filenames for the test nets.
-  optional NetParameter train_net_param = 21; // Full params for the train net.
-  repeated NetParameter test_net_param = 22; // Full params for the test nets.
-  // The number of iterations for each testing phase.
+  // Exactly one train net must be specified using one of the following fields:
+  //     train_net_param, train_net, net_param, net
+  // One or more test nets may be specified using any of the following fields:
+  //     test_net_param, test_net, net_param, net
+  // If more than one test net field is specified (e.g., both net and
+  // test_net are specified), they will be evaluated in the field order given
+  // above: (1) test_net_param, (2) test_net, (3) net_param/net.
+  // A test_iter must be specified for each test_net.
+  // A test_level and/or a test_stage may also be specified for each test_net.
+  //////////////////////////////////////////////////////////////////////////////
+
+  // Proto filename for the train net, possibly combined with one or more
+  // test nets.
+  optional string net = 24;
+  // Inline train net param, possibly combined with one or more test nets.
+  optional NetParameter net_param = 25;
+
+  optional string train_net = 1; // Proto filename for the train net.
+  repeated string test_net = 2; // Proto filenames for the test nets.
+  optional NetParameter train_net_param = 21; // Inline train net params.
+  repeated NetParameter test_net_param = 22; // Inline test net params.
+
+  // The states for the train/test nets. Must be unspecified or
+  // specified once per net.
+  //
+  // By default, all states will have solver = true;
+  // train_state will have phase = TRAIN,
+  // and all test_state's will have phase = TEST.
+  // Other defaults are set according to the NetState defaults.
+  optional NetState train_state = 26;
+  repeated NetState test_state = 27;
+
+  // The number of iterations for each test net.
   repeated int32 test_iter = 3;
+
   // The number of iterations between two testing phases.
   optional int32 test_interval = 4 [default = 0];
   optional bool test_compute_loss = 19 [default = false];
@@ -118,15 +149,51 @@ message SolverState {
   repeated BlobProto history = 3; // The history for sgd solvers
 }
 
+enum Phase {
+   TRAIN = 0;
+   TEST = 1;
+}
+
+message NetState {
+  optional Phase phase = 1 [default = TEST];
+  optional int32 level = 2 [default = 0];
+  repeated string stage = 3;
+}
+
+message NetStateRule {
+  // Set phase to require the NetState have a particular phase (TRAIN or TEST)
+  // to meet this rule.
+  optional Phase phase = 1;
+
+  // Set the minimum and/or maximum levels in which the layer should be used.
+  // Leave undefined to meet the rule regardless of level.
+  optional int32 min_level = 2;
+  optional int32 max_level = 3;
+
+  // A customizable set of stages.
+  // The net must have ALL of the specified stages to meet the rule.
+  // (Use multiple NetStateRules to specify conjunctions of stages.)
+  repeated string stage = 4;
+}
+
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available ID: 32 (last added: slice_param)
+// LayerParameter next available ID: 34 (last added: exclude)
 message LayerParameter {
   repeated string bottom = 2; // the name of the bottom blobs
   repeated string top = 3; // the name of the top blobs
   optional string name = 4; // the layer name
 
+  // Rules controlling whether and when a layer is included in the network,
+  // based on the current NetState.  You may specify a non-zero number of rules
+  // to include OR exclude, but not both.  If no include or exclude rules are
+  // specified, the layer is always included.  If the current NetState meets
+  // ANY (i.e., one or more) of the specified rules, the layer is
+  // included/excluded.
+  repeated NetStateRule include = 32;
+  repeated NetStateRule exclude = 33;
+
   // NOTE
   // Add new LayerTypes to the enum below in lexicographical order (other than
   // starting with NONE), starting with the next available ID in the comment

From b8833565f8234903d33c880017c3768cd02e6fd9 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Tue, 29 Jul 2014 11:22:52 -0700
Subject: [PATCH 2/4] Add unit tests and skeleton code for Net/Solver filtering
 functionality.

---
 include/caffe/net.hpp          |   8 +
 include/caffe/solver.hpp       |   5 +
 src/caffe/net.cpp              |  11 +
 src/caffe/test/test_net.cpp    | 820 +++++++++++++++++++++++++++++++++
 src/caffe/test/test_solver.cpp |  98 ++++
 5 files changed, 942 insertions(+)
 create mode 100644 src/caffe/test/test_solver.cpp

diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 7548011d973..808b244e47d 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -115,6 +115,14 @@ class Net {
 
   void set_debug_info(const bool value) { debug_info_ = value; }
 
+  // Helpers for Init.
+  // Remove layers that the user specified should be excluded given the current
+  // phase, level, and stage.
+  static void FilterNet(const NetParameter& param,
+      NetParameter* param_filtered);
+  static bool StateMeetsRule(const NetState& state, const NetStateRule& rule,
+      const string& layer_name);
+
  protected:
   // Helpers for Init.
   // Append a new input or top blob to the net.
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 3112c59e0fc..52dcd1936c5 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -6,6 +6,8 @@
 #include <string>
 #include <vector>
 
+#include "caffe/net.hpp"
+
 namespace caffe {
 
 template <typename Dtype>
@@ -20,6 +22,9 @@ class Solver {
   inline void Solve(const string resume_file) { Solve(resume_file.c_str()); }
   virtual ~Solver() {}
   inline shared_ptr<Net<Dtype> > net() { return net_; }
+  inline const vector<shared_ptr<Net<Dtype> > >& test_nets() {
+    return test_nets_;
+  }
 
  protected:
   // PreSolve is run before any solving iteration starts, allowing one to
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 228f826a8b9..a02b1118826 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -167,6 +167,17 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   debug_info_ = false;
 }
 
+template <typename Dtype>
+void Net<Dtype>::FilterNet(const NetParameter& param,
+    NetParameter* param_filtered) {
+}
+
+template <typename Dtype>
+bool Net<Dtype>::StateMeetsRule(const NetState& state,
+    const NetStateRule& rule, const string& layer_name) {
+  return true;
+}
+
 // Helper for Net::Init: add a new input or top blob to the net.  (Inputs have
 // layer_id == -1, tops have layer_id >= 0.)
 template <typename Dtype>
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index e84701d941c..c4181345573 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -835,4 +835,824 @@ TYPED_TEST(NetTest, TestFromTo) {
   }
 }
 
+class FilterNetTest : public ::testing::Test {
+ protected:
+  void RunFilterNetTest(
+      const string& input_param_string, const string& filtered_param_string) {
+    NetParameter input_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        input_param_string, &input_param));
+    NetParameter expected_filtered_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        filtered_param_string, &expected_filtered_param));
+    NetParameter actual_filtered_param;
+    Net<float>::FilterNet(input_param, &actual_filtered_param);
+    EXPECT_EQ(expected_filtered_param.DebugString(),
+        actual_filtered_param.DebugString());
+    // Also test idempotence.
+    NetParameter double_filtered_param;
+    Net<float>::FilterNet(actual_filtered_param, &double_filtered_param);
+    EXPECT_EQ(actual_filtered_param.DebugString(),
+       double_filtered_param.DebugString());
+  }
+};
+
+TEST_F(FilterNetTest, TestNoFilter) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterLeNetTrainTest) {
+  const string& input_proto =
+      "name: 'LeNet' "
+      "layers { "
+      "  name: 'mnist' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "  data_param { "
+      "    source: 'mnist-train-leveldb' "
+      "    scale: 0.00390625 "
+      "    batch_size: 64 "
+      "  } "
+      "  include: { phase: TRAIN } "
+      "} "
+      "layers { "
+      "  name: 'mnist' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "  data_param { "
+      "    source: 'mnist-test-leveldb' "
+      "    scale: 0.00390625 "
+      "    batch_size: 100 "
+      "  } "
+      "  include: { phase: TEST } "
+      "} "
+      "layers { "
+      "  name: 'conv1' "
+      "  type: CONVOLUTION "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "  blobs_lr: 1 "
+      "  blobs_lr: 2 "
+      "  convolution_param { "
+      "    num_output: 20 "
+      "    kernel_size: 5 "
+      "    stride: 1 "
+      "    weight_filler { "
+      "      type: 'xavier' "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "    } "
+      "  } "
+      "} "
+      "layers { "
+      "  name: 'ip1' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'conv1' "
+      "  top: 'ip1' "
+      "  blobs_lr: 1 "
+      "  blobs_lr: 2 "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    weight_filler { "
+      "      type: 'xavier' "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "    } "
+      "  } "
+      "} "
+      "layers { "
+      "  name: 'accuracy' "
+      "  type: ACCURACY "
+      "  bottom: 'ip1' "
+      "  bottom: 'label' "
+      "  top: 'accuracy' "
+      "  include: { phase: TEST } "
+      "} "
+      "layers { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'ip2' "
+      "  bottom: 'label' "
+      "  top: 'loss' "
+      "} ";
+  const string input_proto_train = "state: { phase: TRAIN } " + input_proto;
+  const string input_proto_test = "state: { phase: TEST } " + input_proto;
+  const string& output_proto_train =
+      "state: { phase: TRAIN } "
+      "name: 'LeNet' "
+      "layers { "
+      "  name: 'mnist' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "  data_param { "
+      "    source: 'mnist-train-leveldb' "
+      "    scale: 0.00390625 "
+      "    batch_size: 64 "
+      "  } "
+      "  include: { phase: TRAIN } "
+      "} "
+      "layers { "
+      "  name: 'conv1' "
+      "  type: CONVOLUTION "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "  blobs_lr: 1 "
+      "  blobs_lr: 2 "
+      "  convolution_param { "
+      "    num_output: 20 "
+      "    kernel_size: 5 "
+      "    stride: 1 "
+      "    weight_filler { "
+      "      type: 'xavier' "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "    } "
+      "  } "
+      "} "
+      "layers { "
+      "  name: 'ip1' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'conv1' "
+      "  top: 'ip1' "
+      "  blobs_lr: 1 "
+      "  blobs_lr: 2 "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    weight_filler { "
+      "      type: 'xavier' "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "    } "
+      "  } "
+      "} "
+      "layers { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'ip2' "
+      "  bottom: 'label' "
+      "  top: 'loss' "
+      "} ";
+  const string& output_proto_test =
+      "state: { phase: TEST } "
+      "name: 'LeNet' "
+      "layers { "
+      "  name: 'mnist' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "  data_param { "
+      "    source: 'mnist-test-leveldb' "
+      "    scale: 0.00390625 "
+      "    batch_size: 100 "
+      "  } "
+      "  include: { phase: TEST } "
+      "} "
+      "layers { "
+      "  name: 'conv1' "
+      "  type: CONVOLUTION "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "  blobs_lr: 1 "
+      "  blobs_lr: 2 "
+      "  convolution_param { "
+      "    num_output: 20 "
+      "    kernel_size: 5 "
+      "    stride: 1 "
+      "    weight_filler { "
+      "      type: 'xavier' "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "    } "
+      "  } "
+      "} "
+      "layers { "
+      "  name: 'ip1' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'conv1' "
+      "  top: 'ip1' "
+      "  blobs_lr: 1 "
+      "  blobs_lr: 2 "
+      "  inner_product_param { "
+      "    num_output: 10 "
+      "    weight_filler { "
+      "      type: 'xavier' "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "    } "
+      "  } "
+      "} "
+      "layers { "
+      "  name: 'accuracy' "
+      "  type: ACCURACY "
+      "  bottom: 'ip1' "
+      "  bottom: 'label' "
+      "  top: 'accuracy' "
+      "  include: { phase: TEST } "
+      "} "
+      "layers { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'ip2' "
+      "  bottom: 'label' "
+      "  top: 'loss' "
+      "} ";
+  this->RunFilterNetTest(input_proto_train, output_proto_train);
+  this->RunFilterNetTest(input_proto_test, output_proto_test);
+}
+
+TEST_F(FilterNetTest, TestFilterOutByStage) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "  include: { stage: 'mystage' } "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  const string& output_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, output_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterOutByStage2) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { stage: 'mystage' } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  const string& output_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, output_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByStage) {
+  const string& input_proto =
+      "state: { stage: 'mystage' } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { stage: 'mystage' } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByStage2) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  exclude: { stage: 'mystage' } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterOutByMultipleStage) {
+  const string& input_proto =
+      "state: { stage: 'mystage' } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { stage: 'mystage' stage: 'myotherstage' } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  include: { stage: 'mystage' } "
+      "} ";
+  const string& output_proto =
+      "state: { stage: 'mystage' } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  include: { stage: 'mystage' } "
+      "} ";
+  this->RunFilterNetTest(input_proto, output_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByMultipleStage) {
+  const string& input_proto =
+      "state: { stage: 'mystage' } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { stage: 'myotherstage' } "
+      "  include: { stage: 'mystage' } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  include: { stage: 'mystage' } "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByMultipleStage2) {
+  const string& input_proto =
+      "state: { stage: 'mystage' stage: 'myotherstage' } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { stage: 'mystage' stage: 'myotherstage' } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  include: { stage: 'mystage' } "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterOutByMinLevel) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { min_level: 3 } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  const string& output_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, output_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterOutByMaxLevel) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { max_level: -3 } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  const string& output_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, output_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByMinLevel) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { min_level: 0 } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByMinLevel2) {
+  const string& input_proto =
+      "state: { level: 7 } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { min_level: 3 } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByMaxLevel) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { max_level: 0 } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInByMaxLevel2) {
+  const string& input_proto =
+      "state: { level: -7 } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { max_level: -3 } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunFilterNetTest(input_proto, input_proto);
+}
+
+TEST_F(FilterNetTest, TestFilterInOutByIncludeMultiRule) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { min_level: 2  phase: TRAIN } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  include: { min_level: 2  phase: TEST } "
+      "} ";
+  const string& input_proto_train =
+      "state: { level: 4  phase: TRAIN } " + input_proto;
+  const string& input_proto_test =
+      "state: { level: 4  phase: TEST } " + input_proto;
+  const string& output_proto_train =
+      "state: { level: 4  phase: TRAIN } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { min_level: 2  phase: TRAIN } "
+      "} ";
+  const string& output_proto_test =
+      "state: { level: 4  phase: TEST } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  include: { min_level: 2  phase: TEST } "
+      "} ";
+  this->RunFilterNetTest(input_proto_train, output_proto_train);
+  this->RunFilterNetTest(input_proto_test, output_proto_test);
+}
+
+TEST_F(FilterNetTest, TestFilterInByIncludeMultiRule) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  include: { min_level: 2  phase: TRAIN } "
+      "  include: { phase: TEST } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  include: { min_level: 2  phase: TEST } "
+      "  include: { phase: TRAIN } "
+      "} ";
+  const string& input_proto_train =
+      "state: { level: 2  phase: TRAIN } " + input_proto;
+  const string& input_proto_test =
+      "state: { level: 2  phase: TEST } " + input_proto;
+  this->RunFilterNetTest(input_proto_train, input_proto_train);
+  this->RunFilterNetTest(input_proto_test, input_proto_test);
+}
+
+TEST_F(FilterNetTest, TestFilterInOutByExcludeMultiRule) {
+  const string& input_proto =
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  exclude: { min_level: 2  phase: TRAIN } "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  exclude: { min_level: 2  phase: TEST } "
+      "} ";
+  const string& input_proto_train =
+      "state: { level: 4  phase: TRAIN } " + input_proto;
+  const string& input_proto_test =
+      "state: { level: 4  phase: TEST } " + input_proto;
+  const string& output_proto_train =
+      "state: { level: 4  phase: TRAIN } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'innerprod' "
+      "  bottom: 'label' "
+      "  exclude: { min_level: 2  phase: TEST } "
+      "} ";
+  const string& output_proto_test =
+      "state: { level: 4  phase: TEST } "
+      "name: 'TestNetwork' "
+      "layers: { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers: { "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
+      "  bottom: 'data' "
+      "  top: 'innerprod' "
+      "  exclude: { min_level: 2  phase: TRAIN } "
+      "} ";
+  this->RunFilterNetTest(input_proto_train, output_proto_train);
+  this->RunFilterNetTest(input_proto_test, output_proto_test);
+}
+
 }  // namespace caffe
diff --git a/src/caffe/test/test_solver.cpp b/src/caffe/test/test_solver.cpp
new file mode 100644
index 00000000000..7380ab86308
--- /dev/null
+++ b/src/caffe/test/test_solver.cpp
@@ -0,0 +1,98 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "google/protobuf/text_format.h"
+
+#include "gtest/gtest.h"
+#include "caffe/common.hpp"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/solver.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+using std::ostringstream;
+
+namespace caffe {
+
+template <typename TypeParam>
+class SolverTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  virtual void InitSolverFromProtoString(const string& proto) {
+    SolverParameter param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(proto, &param));
+    solver_.reset(new SGDSolver<Dtype>(param));
+  }
+
+  shared_ptr<Solver<Dtype> > solver_;
+};
+
+TYPED_TEST_CASE(SolverTest, TestDtypesAndDevices);
+
+TYPED_TEST(SolverTest, TestInitTrainTestNets) {
+  const string& proto =
+     "test_interval: 10 "
+     "test_iter: 10 "
+     "test_state: { stage: 'with-softmax' }"
+     "test_iter: 10 "
+     "test_state: {}"
+     "net_param { "
+     "  name: 'TestNetwork' "
+     "  layers: { "
+     "    name: 'data' "
+     "    type: DUMMY_DATA "
+     "    dummy_data_param { "
+     "      num: 5 "
+     "      channels: 3 "
+     "      height: 10 "
+     "      width: 10 "
+     "      num: 5 "
+     "      channels: 1 "
+     "      height: 1 "
+     "      width: 1 "
+     "    } "
+     "    top: 'data' "
+     "    top: 'label' "
+     "  } "
+     "  layers: { "
+     "    name: 'innerprod' "
+     "    type: INNER_PRODUCT "
+     "    inner_product_param { "
+     "      num_output: 10 "
+     "    } "
+     "    bottom: 'data' "
+     "    top: 'innerprod' "
+     "  } "
+     "  layers: { "
+     "    name: 'accuracy' "
+     "    type: ACCURACY "
+     "    bottom: 'innerprod' "
+     "    bottom: 'label' "
+     "    top: 'accuracy' "
+     "    exclude: { phase: TRAIN } "
+     "  } "
+     "  layers: { "
+     "    name: 'loss' "
+     "    type: SOFTMAX_LOSS "
+     "    bottom: 'innerprod' "
+     "    bottom: 'label' "
+     "    include: { phase: TRAIN } "
+     "    include: { phase: TEST stage: 'with-softmax' } "
+     "  } "
+     "} ";
+  this->InitSolverFromProtoString(proto);
+  ASSERT_TRUE(this->solver_->net());
+  EXPECT_TRUE(this->solver_->net()->has_layer("loss"));
+  EXPECT_FALSE(this->solver_->net()->has_layer("accuracy"));
+  ASSERT_EQ(2, this->solver_->test_nets().size());
+  EXPECT_TRUE(this->solver_->test_nets()[0]->has_layer("loss"));
+  EXPECT_TRUE(this->solver_->test_nets()[0]->has_layer("accuracy"));
+  EXPECT_FALSE(this->solver_->test_nets()[1]->has_layer("loss"));
+  EXPECT_TRUE(this->solver_->test_nets()[1]->has_layer("accuracy"));
+}
+
+}  // namespace caffe

From cb4555c052a6ac2ba65a7d71681d9320a6afefb6 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 17 Jul 2014 20:38:53 -0700
Subject: [PATCH 3/4] Incorporate NetState{,Rule} into Solver/Net. 
 Net::FilterNet includes/excludes layers based on whether the NetState meets
 each layer's NetStateRule(s).

---
 include/caffe/solver.hpp |   2 +
 src/caffe/net.cpp        |  78 ++++++++++++++++++++++-
 src/caffe/solver.cpp     | 133 +++++++++++++++++++++++++++++++--------
 3 files changed, 185 insertions(+), 28 deletions(-)

diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 52dcd1936c5..811d70372a4 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -16,6 +16,8 @@ class Solver {
   explicit Solver(const SolverParameter& param);
   explicit Solver(const string& param_file);
   void Init(const SolverParameter& param);
+  void InitTrainNet();
+  void InitTestNets();
   // The main entry of the solver function. In default, iter will be zero. Pass
   // in a non-zero iter number to resume training for a pre-trained net.
   virtual void Solve(const char* resume_file = NULL);
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index a02b1118826..a4d1f235a5a 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -32,11 +32,15 @@ Net<Dtype>::Net(const string& param_file) {
 
 template <typename Dtype>
 void Net<Dtype>::Init(const NetParameter& in_param) {
+  // Filter layers based on their include/exclude rules and
+  // the current NetState.
+  NetParameter filtered_param;
+  FilterNet(in_param, &filtered_param);
   LOG(INFO) << "Initializing net from parameters: " << std::endl
-            << in_param.DebugString();
-  // Create a copy of in_param with splits added where necessary.
+            << filtered_param.DebugString();
+  // Create a copy of filtered_param with splits added where necessary.
   NetParameter param;
-  InsertSplits(in_param, &param);
+  InsertSplits(filtered_param, &param);
   // Basically, build all the layers and set up its connections.
   name_ = param.name();
   map<string, int> blob_name_to_idx;
@@ -170,11 +174,79 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
 template <typename Dtype>
 void Net<Dtype>::FilterNet(const NetParameter& param,
     NetParameter* param_filtered) {
+  const NetState& net_state = param.state();
+  param_filtered->CopyFrom(param);
+  param_filtered->clear_layers();
+  for (int i = 0; i < param.layers_size(); ++i) {
+    const LayerParameter& layer_param = param.layers(i);
+    const string& layer_name = layer_param.name();
+    CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0)
+          << "Specify either include rules or exclude rules; not both.";
+    // If no include rules are specified, the layer is included by default and
+    // only excluded if it meets one of the exclude rules.
+    bool layer_included = (layer_param.include_size() == 0);
+    for (int j = 0; layer_included && j < layer_param.exclude_size(); ++j) {
+      if (StateMeetsRule(net_state, layer_param.exclude(j), layer_name)) {
+        layer_included = false;
+      }
+    }
+    for (int j = 0; !layer_included && j < layer_param.include_size(); ++j) {
+      if (StateMeetsRule(net_state, layer_param.include(j), layer_name)) {
+        layer_included = true;
+      }
+    }
+    if (layer_included) {
+      param_filtered->add_layers()->CopyFrom(layer_param);
+    }
+  }
 }
 
 template <typename Dtype>
 bool Net<Dtype>::StateMeetsRule(const NetState& state,
     const NetStateRule& rule, const string& layer_name) {
+  // Check whether the rule is broken due to phase.
+  if (rule.has_phase()) {
+      if (rule.phase() != state.phase()) {
+        LOG(INFO) << "The NetState phase (" << state.phase()
+          << ") differed from the phase (" << rule.phase()
+          << ") specified by a rule in layer " << layer_name;
+        return false;
+      }
+  }
+  // Check whether the rule is broken due to min level.
+  if (rule.has_min_level()) {
+    if (state.level() < rule.min_level()) {
+      LOG(INFO) << "The NetState level (" << state.level()
+          << ") is above the min_level (" << rule.min_level()
+          << " specified by a rule in layer " << layer_name;
+      return false;
+    }
+  }
+  // Check whether the rule is broken due to max level.
+  if (rule.has_max_level()) {
+    if (state.level() > rule.max_level()) {
+      LOG(INFO) << "The NetState level (" << state.level()
+          << ") is above the max_level (" << rule.max_level()
+          << " specified by a rule in layer " << layer_name;
+      return false;
+    }
+  }
+  // Check whether the rule is broken due to stage. If stage is specified,
+  // the NetState must contain ALL of the rule's stages to meet it.
+  if (rule.stage_size()) {
+    for (int i = 0; i < rule.stage_size(); ++i) {
+      // Check that the NetState contains the rule's ith stage.
+      bool has_stage = false;
+      for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
+        if (rule.stage(i) == state.stage(j)) { has_stage = true; }
+      }
+      if (!has_stage) {
+        LOG(INFO) << "The NetState did not contain stage '" << rule.stage(i)
+                  << "' specified by a rule in layer " << layer_name;
+        return false;
+      }
+    }
+  }
   return true;
 }
 
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 91738817875..11795f058cd 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -11,6 +11,7 @@
 #include "caffe/solver.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
+#include "caffe/util/upgrade_proto.hpp"
 
 namespace caffe {
 
@@ -42,40 +43,122 @@ void Solver<Dtype>::Init(const SolverParameter& param) {
     Caffe::set_random_seed(param_.random_seed());
   }
   // Scaffolding code
+  InitTrainNet();
+  InitTestNets();
+  LOG(INFO) << "Solver scaffolding done.";
+}
+
+template <typename Dtype>
+void Solver<Dtype>::InitTrainNet() {
+  const int num_train_nets = param_.has_net() + param_.has_net_param() +
+      param_.has_train_net() + param_.has_train_net_param();
+  const string& field_names = "net, net_param, train_net, train_net_param";
+  CHECK_GE(num_train_nets, 1) << "SolverParameter must specify a train net "
+      << "using one of these fields: " << field_names;
+  CHECK_LE(num_train_nets, 1) << "SolverParameter must not contain more than "
+      << "one of these fields specifying a train_net: " << field_names;
+  NetParameter net_param;
   if (param_.has_train_net_param()) {
-    CHECK(!param_.has_train_net()) << "Either train_net_param or train_net may "
-                                   << "be specified, but not both.";
-    LOG(INFO) << "Creating training net specified in SolverParameter.";
-    net_.reset(new Net<Dtype>(param_.train_net_param()));
-  } else {
-    CHECK(param_.has_train_net())
-        << "Neither train_net nor train_net_param were specified.";
-    LOG(INFO) << "Creating training net from file: " << param_.train_net();
-    net_.reset(new Net<Dtype>(param_.train_net()));
+    LOG(INFO) << "Creating training net specified in train_net_param.";
+    net_param.CopyFrom(param_.train_net_param());
+  } else if (param_.has_train_net()) {
+    LOG(INFO) << "Creating training net from train_net file: "
+              << param_.train_net();
+    ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param);
   }
-  CHECK(net_) << "Training net uninitialized.";
-  net_->set_debug_info(param_.debug_info());
+  if (param_.has_net_param()) {
+    LOG(INFO) << "Creating training net specified in net_param.";
+    net_param.CopyFrom(param_.net_param());
+  }
+  if (param_.has_net()) {
+    LOG(INFO) << "Creating training net from net file: " << param_.net();
+    ReadNetParamsFromTextFileOrDie(param_.net(), &net_param);
+  }
+  // Set the correct NetState.  We start with the solver defaults (lowest
+  // precedence); then, merge in any NetState specified by the net_param itself;
+  // finally, merge in any NetState specified by the train_state (highest
+  // precedence).
+  NetState net_state;
+  net_state.set_phase(TRAIN);
+  net_state.MergeFrom(net_param.state());
+  net_state.MergeFrom(param_.train_state());
+  net_param.mutable_state()->CopyFrom(net_state);
+  net_.reset(new Net<Dtype>(net_param));
+}
+
+template <typename Dtype>
+void Solver<Dtype>::InitTestNets() {
+  const bool has_net_param = param_.has_net_param();
+  const bool has_net_file = param_.has_net();
+  const int num_generic_nets = has_net_param + has_net_file;
+  CHECK_LE(num_generic_nets, 1)
+      << "Both net_param and net_file may not be specified.";
   const int num_test_net_params = param_.test_net_param_size();
   const int num_test_net_files = param_.test_net_size();
   const int num_test_nets = num_test_net_params + num_test_net_files;
-  if (num_test_nets) {
-    CHECK_EQ(param_.test_iter_size(), num_test_nets)
-        << "test_iter must be specified for each test network.";
+  if (num_generic_nets) {
+      CHECK_GE(param_.test_iter_size(), num_test_nets)
+          << "test_iter must be specified for each test network.";
+  } else {
+      CHECK_EQ(param_.test_iter_size(), num_test_nets)
+          << "test_iter must be specified for each test network.";
+  }
+  // If we have a generic net (specified by net or net_param, rather than
+  // test_net or test_net_param), we may have an unlimited number of actual
+  // test networks -- the actual number is given by the number of remaining
+  // test_iters after any test nets specified by test_net_param and/or test_net
+  // are evaluated.
+  const int num_generic_net_instances = param_.test_iter_size() - num_test_nets;
+  const int num_test_net_instances = num_test_nets + num_generic_net_instances;
+  if (param_.test_state_size()) {
+    CHECK_EQ(param_.test_state_size(), num_test_net_instances)
+        << "test_state must be unspecified or specified once per test net.";
+  }
+  if (num_test_net_instances) {
     CHECK_GT(param_.test_interval(), 0);
   }
-  test_nets_.resize(num_test_nets);
-  for (int i = 0; i < num_test_net_params; ++i) {
-      LOG(INFO) << "Creating testing net (#" << i
-                << ") specified in SolverParameter.";
-      test_nets_[i].reset(new Net<Dtype>(param_.test_net_param(i)));
+  int test_net_id = 0;
+  vector<string> sources(num_test_net_instances);
+  vector<NetParameter> net_params(num_test_net_instances);
+  for (int i = 0; i < num_test_net_params; ++i, ++test_net_id) {
+      sources[test_net_id] = "test_net_param";
+      net_params[test_net_id].CopyFrom(param_.test_net_param(i));
   }
-  for (int i = 0, test_net_id = num_test_net_params;
-       i < num_test_net_files; ++i, ++test_net_id) {
-      LOG(INFO) << "Creating testing net (#" << test_net_id
-                << ") from file: " << param.test_net(i);
-      test_nets_[test_net_id].reset(new Net<Dtype>(param_.test_net(i)));
+  for (int i = 0; i < num_test_net_files; ++i, ++test_net_id) {
+      sources[test_net_id] = "test_net file: " + param_.test_net(i);
+      ReadNetParamsFromTextFileOrDie(param_.test_net(i),
+          &net_params[test_net_id]);
+  }
+  const int remaining_test_nets = param_.test_iter_size() - test_net_id;
+  if (has_net_param) {
+    for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) {
+      sources[test_net_id] = "net_param";
+      net_params[test_net_id].CopyFrom(param_.net_param());
+    }
+  }
+  if (has_net_file) {
+    for (int i = 0; i < remaining_test_nets; ++i, ++test_net_id) {
+      sources[test_net_id] = "net file: " + param_.net();
+      ReadNetParamsFromTextFileOrDie(param_.net(), &net_params[test_net_id]);
+    }
+  }
+  test_nets_.resize(num_test_net_instances);
+  for (int i = 0; i < num_test_net_instances; ++i) {
+    // Set the correct NetState.  We start with the solver defaults (lowest
+    // precedence); then, merge in any NetState specified by the net_param
+    // itself; finally, merge in any NetState specified by the test_state
+    // (highest precedence).
+    NetState net_state;
+    net_state.set_phase(TEST);
+    net_state.MergeFrom(net_params[i].state());
+    if (param_.test_state_size()) {
+      net_state.MergeFrom(param_.test_state(i));
+    }
+    net_params[i].mutable_state()->CopyFrom(net_state);
+    LOG(INFO)
+        << "Creating testing net (#" << i << ") specified by " << sources[i];
+    test_nets_[i].reset(new Net<Dtype>(net_params[i]));
   }
-  LOG(INFO) << "Solver scaffolding done.";
 }
 
 template <typename Dtype>

From e526e2ddd71c0881b71f31bf522806f85f687648 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Fri, 18 Jul 2014 15:07:13 -0700
Subject: [PATCH 4/4] Use unified train/test nets in examples.

---
 examples/cifar10/cifar10_full_solver.prototxt |   6 +-
 .../cifar10/cifar10_full_solver_lr1.prototxt  |   6 +-
 .../cifar10/cifar10_full_solver_lr2.prototxt  |   6 +-
 examples/cifar10/cifar10_full_train.prototxt  | 174 ------------
 ...totxt => cifar10_full_train_test.prototxt} |  16 +-
 .../cifar10/cifar10_quick_solver.prototxt     |   6 +-
 .../cifar10/cifar10_quick_solver_lr1.prototxt |   6 +-
 examples/cifar10/cifar10_quick_train.prototxt | 168 ------------
 ...otxt => cifar10_quick_train_test.prototxt} |  16 +-
 examples/imagenet/alexnet_solver.prototxt     |   3 +-
 ...in.prototxt => alexnet_train_val.prototxt} |  23 ++
 examples/imagenet/alexnet_val.prototxt        | 228 ---------------
 examples/imagenet/imagenet_solver.prototxt    |   3 +-
 ...n.prototxt => imagenet_train_val.prototxt} |  23 ++
 examples/imagenet/imagenet_val.prototxt       | 228 ---------------
 examples/imagenet/readme.md                   |   7 +-
 .../mnist/lenet_consolidated_solver.prototxt  | 259 ++----------------
 examples/mnist/lenet_solver.prototxt          |   6 +-
 examples/mnist/lenet_test.prototxt            | 118 --------
 ...ain.prototxt => lenet_train_test.prototxt} |  23 ++
 ...in.prototxt => mnist_autoencoder.prototxt} |  28 ++
 .../mnist/mnist_autoencoder_solver.prototxt   |   3 +-
 .../mnist/mnist_autoencoder_test.prototxt     | 146 ----------
 examples/mnist/readme.md                      |   6 +-
 24 files changed, 177 insertions(+), 1331 deletions(-)
 delete mode 100644 examples/cifar10/cifar10_full_train.prototxt
 rename examples/cifar10/{cifar10_full_test.prototxt => cifar10_full_train_test.prototxt} (89%)
 delete mode 100644 examples/cifar10/cifar10_quick_train.prototxt
 rename examples/cifar10/{cifar10_quick_test.prototxt => cifar10_quick_train_test.prototxt} (89%)
 rename examples/imagenet/{alexnet_train.prototxt => alexnet_train_val.prototxt} (91%)
 delete mode 100644 examples/imagenet/alexnet_val.prototxt
 rename examples/imagenet/{imagenet_train.prototxt => imagenet_train_val.prototxt} (91%)
 delete mode 100644 examples/imagenet/imagenet_val.prototxt
 delete mode 100644 examples/mnist/lenet_test.prototxt
 rename examples/mnist/{lenet_train.prototxt => lenet_train_test.prototxt} (82%)
 rename examples/mnist/{mnist_autoencoder_train.prototxt => mnist_autoencoder.prototxt} (88%)
 delete mode 100644 examples/mnist/mnist_autoencoder_test.prototxt

diff --git a/examples/cifar10/cifar10_full_solver.prototxt b/examples/cifar10/cifar10_full_solver.prototxt
index 0a0b456308d..49de3f58803 100644
--- a/examples/cifar10/cifar10_full_solver.prototxt
+++ b/examples/cifar10/cifar10_full_solver.prototxt
@@ -1,10 +1,8 @@
 # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
 # then another factor of 10 after 10 more epochs (5000 iters)
 
-# The training protocol buffer definition
-train_net: "cifar10_full_train.prototxt"
-# The testing protocol buffer definition
-test_net: "cifar10_full_test.prototxt"
+# The train/test net protocol buffer definition
+net: "cifar10_full_train_test.prototxt"
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
 # covering the full 10,000 testing images.
diff --git a/examples/cifar10/cifar10_full_solver_lr1.prototxt b/examples/cifar10/cifar10_full_solver_lr1.prototxt
index 4376de5493f..746f4fba15a 100644
--- a/examples/cifar10/cifar10_full_solver_lr1.prototxt
+++ b/examples/cifar10/cifar10_full_solver_lr1.prototxt
@@ -1,10 +1,8 @@
 # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
 # then another factor of 10 after 10 more epochs (5000 iters)
 
-# The training protocol buffer definition
-train_net: "cifar10_full_train.prototxt"
-# The testing protocol buffer definition
-test_net: "cifar10_full_test.prototxt"
+# The train/test net protocol buffer definition
+net: "cifar10_full_train_test.prototxt"
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
 # covering the full 10,000 testing images.
diff --git a/examples/cifar10/cifar10_full_solver_lr2.prototxt b/examples/cifar10/cifar10_full_solver_lr2.prototxt
index 19580c5184a..5a549ffc96d 100644
--- a/examples/cifar10/cifar10_full_solver_lr2.prototxt
+++ b/examples/cifar10/cifar10_full_solver_lr2.prototxt
@@ -1,10 +1,8 @@
 # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
 # then another factor of 10 after 10 more epochs (5000 iters)
 
-# The training protocol buffer definition
-train_net: "cifar10_full_train.prototxt"
-# The testing protocol buffer definition
-test_net: "cifar10_full_test.prototxt"
+# The train/test net protocol buffer definition
+net: "cifar10_full_train_test.prototxt"
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
 # covering the full 10,000 testing images.
diff --git a/examples/cifar10/cifar10_full_train.prototxt b/examples/cifar10/cifar10_full_train.prototxt
deleted file mode 100644
index 25c76060991..00000000000
--- a/examples/cifar10/cifar10_full_train.prototxt
+++ /dev/null
@@ -1,174 +0,0 @@
-name: "CIFAR10_full_train"
-layers {
-  name: "cifar"
-  type: DATA
-  top: "data"
-  top: "label"
-  data_param {
-    source: "cifar10-leveldb/cifar-train-leveldb"
-    mean_file: "mean.binaryproto"
-    batch_size: 100
-  }
-}
-layers {
-  name: "conv1"
-  type: CONVOLUTION
-  bottom: "data"
-  top: "conv1"
-  blobs_lr: 1
-  blobs_lr: 2
-  convolution_param {
-    num_output: 32
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.0001
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "pool1"
-  type: POOLING
-  bottom: "conv1"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "relu1"
-  type: RELU
-  bottom: "pool1"
-  top: "pool1"
-}
-layers {
-  name: "norm1"
-  type: LRN
-  bottom: "pool1"
-  top: "norm1"
-  lrn_param {
-    norm_region: WITHIN_CHANNEL
-    local_size: 3
-    alpha: 5e-05
-    beta: 0.75
-  }
-}
-layers {
-  name: "conv2"
-  type: CONVOLUTION
-  bottom: "norm1"
-  top: "conv2"
-  blobs_lr: 1
-  blobs_lr: 2
-  convolution_param {
-    num_output: 32
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "relu2"
-  type: RELU
-  bottom: "conv2"
-  top: "conv2"
-}
-layers {
-  name: "pool2"
-  type: POOLING
-  bottom: "conv2"
-  top: "pool2"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "norm2"
-  type: LRN
-  bottom: "pool2"
-  top: "norm2"
-  lrn_param {
-    norm_region: WITHIN_CHANNEL
-    local_size: 3
-    alpha: 5e-05
-    beta: 0.75
-  }
-}
-layers {
-  name: "conv3"
-  type: CONVOLUTION
-  bottom: "norm2"
-  top: "conv3"
-  convolution_param {
-    num_output: 64
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "relu3"
-  type: RELU
-  bottom: "conv3"
-  top: "conv3"
-}
-layers {
-  name: "pool3"
-  type: POOLING
-  bottom: "conv3"
-  top: "pool3"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "ip1"
-  type: INNER_PRODUCT
-  bottom: "pool3"
-  top: "ip1"
-  blobs_lr: 1
-  blobs_lr: 2
-  weight_decay: 250
-  weight_decay: 0
-  inner_product_param {
-    num_output: 10
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "loss"
-  type: SOFTMAX_LOSS
-  bottom: "ip1"
-  bottom: "label"
-}
diff --git a/examples/cifar10/cifar10_full_test.prototxt b/examples/cifar10/cifar10_full_train_test.prototxt
similarity index 89%
rename from examples/cifar10/cifar10_full_test.prototxt
rename to examples/cifar10/cifar10_full_train_test.prototxt
index 1f77b4f0348..4fd52fec133 100644
--- a/examples/cifar10/cifar10_full_test.prototxt
+++ b/examples/cifar10/cifar10_full_train_test.prototxt
@@ -1,4 +1,16 @@
-name: "CIFAR10_full_test"
+name: "CIFAR10_full"
+layers {
+  name: "cifar"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "cifar10-leveldb/cifar-train-leveldb"
+    mean_file: "mean.binaryproto"
+    batch_size: 100
+  }
+  include: { phase: TRAIN }
+}
 layers {
   name: "cifar"
   type: DATA
@@ -9,6 +21,7 @@ layers {
     mean_file: "mean.binaryproto"
     batch_size: 100
   }
+  include: { phase: TEST }
 }
 layers {
   name: "conv1"
@@ -172,6 +185,7 @@ layers {
   bottom: "ip1"
   bottom: "label"
   top: "accuracy"
+  include: { phase: TEST }
 }
 layers {
   name: "loss"
diff --git a/examples/cifar10/cifar10_quick_solver.prototxt b/examples/cifar10/cifar10_quick_solver.prototxt
index 4b547cc96f4..cdd0722b3a0 100644
--- a/examples/cifar10/cifar10_quick_solver.prototxt
+++ b/examples/cifar10/cifar10_quick_solver.prototxt
@@ -1,9 +1,7 @@
 # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10
 
-# The training protocol buffer definition
-train_net: "cifar10_quick_train.prototxt"
-# The testing protocol buffer definition
-test_net: "cifar10_quick_test.prototxt"
+# The train/test net protocol buffer definition
+net: "cifar10_quick_train_test.prototxt"
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 # covering the full 10,000 testing images.
diff --git a/examples/cifar10/cifar10_quick_solver_lr1.prototxt b/examples/cifar10/cifar10_quick_solver_lr1.prototxt
index d4ba3d525d9..2ed54ad980f 100644
--- a/examples/cifar10/cifar10_quick_solver_lr1.prototxt
+++ b/examples/cifar10/cifar10_quick_solver_lr1.prototxt
@@ -1,9 +1,7 @@
 # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10
 
-# The training protocol buffer definition
-train_net: "cifar10_quick_train.prototxt"
-# The testing protocol buffer definition
-test_net: "cifar10_quick_test.prototxt"
+# The train/test net protocol buffer definition
+net: "cifar10_quick_train_test.prototxt"
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 # covering the full 10,000 testing images.
diff --git a/examples/cifar10/cifar10_quick_train.prototxt b/examples/cifar10/cifar10_quick_train.prototxt
deleted file mode 100644
index de5b6c32c5d..00000000000
--- a/examples/cifar10/cifar10_quick_train.prototxt
+++ /dev/null
@@ -1,168 +0,0 @@
-name: "CIFAR10_quick_train"
-layers {
-  name: "cifar"
-  type: DATA
-  top: "data"
-  top: "label"
-  data_param {
-    source: "cifar10-leveldb/cifar-train-leveldb"
-    mean_file: "mean.binaryproto"
-    batch_size: 100
-  }
-}
-layers {
-  name: "conv1"
-  type: CONVOLUTION
-  bottom: "data"
-  top: "conv1"
-  blobs_lr: 1
-  blobs_lr: 2
-  convolution_param {
-    num_output: 32
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.0001
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "pool1"
-  type: POOLING
-  bottom: "conv1"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "relu1"
-  type: RELU
-  bottom: "pool1"
-  top: "pool1"
-}
-layers {
-  name: "conv2"
-  type: CONVOLUTION
-  bottom: "pool1"
-  top: "conv2"
-  blobs_lr: 1
-  blobs_lr: 2
-  convolution_param {
-    num_output: 32
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "relu2"
-  type: RELU
-  bottom: "conv2"
-  top: "conv2"
-}
-layers {
-  name: "pool2"
-  type: POOLING
-  bottom: "conv2"
-  top: "pool2"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "conv3"
-  type: CONVOLUTION
-  bottom: "pool2"
-  top: "conv3"
-  blobs_lr: 1
-  blobs_lr: 2
-  convolution_param {
-    num_output: 64
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "relu3"
-  type: RELU
-  bottom: "conv3"
-  top: "conv3"
-}
-layers {
-  name: "pool3"
-  type: POOLING
-  bottom: "conv3"
-  top: "pool3"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "ip1"
-  type: INNER_PRODUCT
-  bottom: "pool3"
-  top: "ip1"
-  blobs_lr: 1
-  blobs_lr: 2
-  inner_product_param {
-    num_output: 64
-    weight_filler {
-      type: "gaussian"
-      std: 0.1
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "ip2"
-  type: INNER_PRODUCT
-  bottom: "ip1"
-  top: "ip2"
-  blobs_lr: 1
-  blobs_lr: 2
-  inner_product_param {
-    num_output: 10
-    weight_filler {
-      type: "gaussian"
-      std: 0.1
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "loss"
-  type: SOFTMAX_LOSS
-  bottom: "ip2"
-  bottom: "label"
-}
diff --git a/examples/cifar10/cifar10_quick_test.prototxt b/examples/cifar10/cifar10_quick_train_test.prototxt
similarity index 89%
rename from examples/cifar10/cifar10_quick_test.prototxt
rename to examples/cifar10/cifar10_quick_train_test.prototxt
index aa82c32aa24..b34d1cd2fcb 100644
--- a/examples/cifar10/cifar10_quick_test.prototxt
+++ b/examples/cifar10/cifar10_quick_train_test.prototxt
@@ -1,4 +1,16 @@
-name: "CIFAR10_quick_test"
+name: "CIFAR10_quick"
+layers {
+  name: "cifar"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "cifar10-leveldb/cifar-train-leveldb"
+    mean_file: "mean.binaryproto"
+    batch_size: 100
+  }
+  include: { phase: TRAIN }
+}
 layers {
   name: "cifar"
   type: DATA
@@ -9,6 +21,7 @@ layers {
     mean_file: "mean.binaryproto"
     batch_size: 100
   }
+  include: { phase: TEST }
 }
 layers {
   name: "conv1"
@@ -166,6 +179,7 @@ layers {
   bottom: "ip2"
   bottom: "label"
   top: "accuracy"
+  include: { phase: TEST }
 }
 layers {
   name: "loss"
diff --git a/examples/imagenet/alexnet_solver.prototxt b/examples/imagenet/alexnet_solver.prototxt
index 75d0d5dffa7..8581e99c6a7 100644
--- a/examples/imagenet/alexnet_solver.prototxt
+++ b/examples/imagenet/alexnet_solver.prototxt
@@ -1,5 +1,4 @@
-train_net: "alexnet_train.prototxt"
-test_net: "alexnet_val.prototxt"
+net: "alexnet_train_val.prototxt"
 test_iter: 1000
 test_interval: 1000
 base_lr: 0.01
diff --git a/examples/imagenet/alexnet_train.prototxt b/examples/imagenet/alexnet_train_val.prototxt
similarity index 91%
rename from examples/imagenet/alexnet_train.prototxt
rename to examples/imagenet/alexnet_train_val.prototxt
index 32a96cfd4d9..f65f3e7f87e 100644
--- a/examples/imagenet/alexnet_train.prototxt
+++ b/examples/imagenet/alexnet_train_val.prototxt
@@ -2,6 +2,8 @@ name: "AlexNet"
 layers {
   name: "data"
   type: DATA
+  top: "data"
+  top: "label"
   data_param {
     source: "ilsvrc12_train_leveldb"
     mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
@@ -9,8 +11,21 @@ layers {
     crop_size: 227
     mirror: true
   }
+  include: { phase: TRAIN }
+}
+layers {
+  name: "data"
+  type: DATA
   top: "data"
   top: "label"
+  data_param {
+    source: "ilsvrc12_val_leveldb"
+    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+    batch_size: 50
+    crop_size: 227
+    mirror: false
+  }
+  include: { phase: TEST }
 }
 layers {
   name: "conv1"
@@ -308,6 +323,14 @@ layers {
   bottom: "fc7"
   top: "fc8"
 }
+layers {
+  name: "accuracy"
+  type: ACCURACY
+  bottom: "fc8"
+  bottom: "label"
+  top: "accuracy"
+  include: { phase: TEST }
+}
 layers {
   name: "loss"
   type: SOFTMAX_LOSS
diff --git a/examples/imagenet/alexnet_val.prototxt b/examples/imagenet/alexnet_val.prototxt
deleted file mode 100644
index 1d8d86b78ff..00000000000
--- a/examples/imagenet/alexnet_val.prototxt
+++ /dev/null
@@ -1,228 +0,0 @@
-name: "AlexNet"
-layers {
-  name: "data"
-  type: DATA
-  data_param {
-    source: "ilsvrc12_val_leveldb"
-    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
-    batch_size: 50
-    crop_size: 227
-    mirror: false
-  }
-  top: "data"
-  top: "label"
-}
-layers {
-  name: "conv1"
-  type: CONVOLUTION
-  convolution_param {
-    num_output: 96
-    kernel_size: 11
-    stride: 4
-  }
-  bottom: "data"
-  top: "conv1"
-}
-layers {
-  name: "relu1"
-  type: RELU
-  bottom: "conv1"
-  top: "conv1"
-}
-layers {
-  name: "norm1"
-  type: LRN
-  lrn_param {
-    local_size: 5
-    alpha: 0.0001
-    beta: 0.75
-  }
-  bottom: "conv1"
-  top: "norm1"
-}
-layers {
-  name: "pool1"
-  type: POOLING
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-  bottom: "norm1"
-  top: "pool1"
-}
-layers {
-  name: "conv2"
-  type: CONVOLUTION
-  convolution_param {
-    num_output: 256
-    pad: 2
-    kernel_size: 5
-    group: 2
-  }
-  bottom: "pool1"
-  top: "conv2"
-}
-layers {
-  name: "relu2"
-  type: RELU
-  bottom: "conv2"
-  top: "conv2"
-}
-layers {
-  name: "norm2"
-  type: LRN
-  lrn_param {
-    local_size: 5
-    alpha: 0.0001
-    beta: 0.75
-  }
-  bottom: "conv2"
-  top: "norm2"
-}
-layers {
-  name: "pool2"
-  type: POOLING
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-  bottom: "norm2"
-  top: "pool2"
-}
-layers {
-  name: "conv3"
-  type: CONVOLUTION
-  convolution_param {
-    num_output: 384
-    pad: 1
-    kernel_size: 3
-  }
-  bottom: "pool2"
-  top: "conv3"
-}
-layers {
-  name: "relu3"
-  type: RELU
-  bottom: "conv3"
-  top: "conv3"
-}
-layers {
-  name: "conv4"
-  type: CONVOLUTION
-  convolution_param {
-    num_output: 384
-    pad: 1
-    kernel_size: 3
-    group: 2
-  }
-  bottom: "conv3"
-  top: "conv4"
-}
-layers {
-  name: "relu4"
-  type: RELU
-  bottom: "conv4"
-  top: "conv4"
-}
-layers {
-  name: "conv5"
-  type: CONVOLUTION
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    group: 2
-  }
-  bottom: "conv4"
-  top: "conv5"
-}
-layers {
-  name: "relu5"
-  type: RELU
-  bottom: "conv5"
-  top: "conv5"
-}
-layers {
-  name: "pool5"
-  type: POOLING
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-  bottom: "conv5"
-  top: "pool5"
-}
-layers {
-  name: "fc6"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 4096
-  }
-  bottom: "pool5"
-  top: "fc6"
-}
-layers {
-  name: "relu6"
-  type: RELU
-  bottom: "fc6"
-  top: "fc6"
-}
-layers {
-  name: "drop6"
-  type: DROPOUT
-  dropout_param {
-    dropout_ratio: 0.5
-  }
-  bottom: "fc6"
-  top: "fc6"
-}
-layers {
-  name: "fc7"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 4096
-  }
-  bottom: "fc6"
-  top: "fc7"
-}
-layers {
-  name: "relu7"
-  type: RELU
-  bottom: "fc7"
-  top: "fc7"
-}
-layers {
-  name: "drop7"
-  type: DROPOUT
-  dropout_param {
-    dropout_ratio: 0.5
-  }
-  bottom: "fc7"
-  top: "fc7"
-}
-layers {
-  name: "fc8"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 1000
-  }
-  bottom: "fc7"
-  top: "fc8"
-}
-layers {
-  name: "accuracy"
-  type: ACCURACY
-  bottom: "fc8"
-  bottom: "label"
-  top: "accuracy"
-}
-layers {
-  name: "loss"
-  type: SOFTMAX_LOSS
-  bottom: "fc8"
-  bottom: "label"
-  top: "loss"
-}
diff --git a/examples/imagenet/imagenet_solver.prototxt b/examples/imagenet/imagenet_solver.prototxt
index e543ba66cad..aedec4104a6 100644
--- a/examples/imagenet/imagenet_solver.prototxt
+++ b/examples/imagenet/imagenet_solver.prototxt
@@ -1,5 +1,4 @@
-train_net: "imagenet_train.prototxt"
-test_net: "imagenet_val.prototxt"
+net: "imagenet_train_val.prototxt"
 test_iter: 1000
 test_interval: 1000
 base_lr: 0.01
diff --git a/examples/imagenet/imagenet_train.prototxt b/examples/imagenet/imagenet_train_val.prototxt
similarity index 91%
rename from examples/imagenet/imagenet_train.prototxt
rename to examples/imagenet/imagenet_train_val.prototxt
index 519d4509be9..af28c1495e5 100644
--- a/examples/imagenet/imagenet_train.prototxt
+++ b/examples/imagenet/imagenet_train_val.prototxt
@@ -11,6 +11,21 @@ layers {
     crop_size: 227
     mirror: true
   }
+  include: { phase: TRAIN }
+}
+layers {
+  name: "data"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "ilsvrc12_val_leveldb"
+    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+    batch_size: 50
+    crop_size: 227
+    mirror: false
+  }
+  include: { phase: TEST }
 }
 layers {
   name: "conv1"
@@ -308,6 +323,14 @@ layers {
     }
   }
 }
+layers {
+  name: "accuracy"
+  type: ACCURACY
+  bottom: "fc8"
+  bottom: "label"
+  top: "accuracy"
+  include: { phase: TEST }
+}
 layers {
   name: "loss"
   type: SOFTMAX_LOSS
diff --git a/examples/imagenet/imagenet_val.prototxt b/examples/imagenet/imagenet_val.prototxt
deleted file mode 100644
index 8be5150cdd2..00000000000
--- a/examples/imagenet/imagenet_val.prototxt
+++ /dev/null
@@ -1,228 +0,0 @@
-name: "CaffeNet"
-layers {
-  name: "data"
-  type: DATA
-  top: "data"
-  top: "label"
-  data_param {
-    source: "ilsvrc12_val_leveldb"
-    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
-    batch_size: 50
-    crop_size: 227
-    mirror: false
-  }
-}
-layers {
-  name: "conv1"
-  type: CONVOLUTION
-  bottom: "data"
-  top: "conv1"
-  convolution_param {
-    num_output: 96
-    kernel_size: 11
-    stride: 4
-  }
-}
-layers {
-  name: "relu1"
-  type: RELU
-  bottom: "conv1"
-  top: "conv1"
-}
-layers {
-  name: "pool1"
-  type: POOLING
-  bottom: "conv1"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "norm1"
-  type: LRN
-  bottom: "pool1"
-  top: "norm1"
-  lrn_param {
-    local_size: 5
-    alpha: 0.0001
-    beta: 0.75
-  }
-}
-layers {
-  name: "conv2"
-  type: CONVOLUTION
-  bottom: "norm1"
-  top: "conv2"
-  convolution_param {
-    num_output: 256
-    pad: 2
-    kernel_size: 5
-    group: 2
-  }
-}
-layers {
-  name: "relu2"
-  type: RELU
-  bottom: "conv2"
-  top: "conv2"
-}
-layers {
-  name: "pool2"
-  type: POOLING
-  bottom: "conv2"
-  top: "pool2"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "norm2"
-  type: LRN
-  bottom: "pool2"
-  top: "norm2"
-  lrn_param {
-    local_size: 5
-    alpha: 0.0001
-    beta: 0.75
-  }
-}
-layers {
-  name: "conv3"
-  type: CONVOLUTION
-  bottom: "norm2"
-  top: "conv3"
-  convolution_param {
-    num_output: 384
-    pad: 1
-    kernel_size: 3
-  }
-}
-layers {
-  name: "relu3"
-  type: RELU
-  bottom: "conv3"
-  top: "conv3"
-}
-layers {
-  name: "conv4"
-  type: CONVOLUTION
-  bottom: "conv3"
-  top: "conv4"
-  convolution_param {
-    num_output: 384
-    pad: 1
-    kernel_size: 3
-    group: 2
-  }
-}
-layers {
-  name: "relu4"
-  type: RELU
-  bottom: "conv4"
-  top: "conv4"
-}
-layers {
-  name: "conv5"
-  type: CONVOLUTION
-  bottom: "conv4"
-  top: "conv5"
-  convolution_param {
-    num_output: 256
-    pad: 1
-    kernel_size: 3
-    group: 2
-  }
-}
-layers {
-  name: "relu5"
-  type: RELU
-  bottom: "conv5"
-  top: "conv5"
-}
-layers {
-  name: "pool5"
-  type: POOLING
-  bottom: "conv5"
-  top: "pool5"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-}
-layers {
-  name: "fc6"
-  type: INNER_PRODUCT
-  bottom: "pool5"
-  top: "fc6"
-  inner_product_param {
-    num_output: 4096
-  }
-}
-layers {
-  name: "relu6"
-  type: RELU
-  bottom: "fc6"
-  top: "fc6"
-}
-layers {
-  name: "drop6"
-  type: DROPOUT
-  bottom: "fc6"
-  top: "fc6"
-  dropout_param {
-    dropout_ratio: 0.5
-  }
-}
-layers {
-  name: "fc7"
-  type: INNER_PRODUCT
-  bottom: "fc6"
-  top: "fc7"
-  inner_product_param {
-    num_output: 4096
-  }
-}
-layers {
-  name: "relu7"
-  type: RELU
-  bottom: "fc7"
-  top: "fc7"
-}
-layers {
-  name: "drop7"
-  type: DROPOUT
-  bottom: "fc7"
-  top: "fc7"
-  dropout_param {
-    dropout_ratio: 0.5
-  }
-}
-layers {
-  name: "fc8"
-  type: INNER_PRODUCT
-  bottom: "fc7"
-  top: "fc8"
-  inner_product_param {
-    num_output: 1000
-  }
-}
-layers {
-  name: "accuracy"
-  type: ACCURACY
-  bottom: "fc8"
-  bottom: "label"
-  top: "accuracy"
-}
-layers {
-  name: "loss"
-  type: SOFTMAX_LOSS
-  bottom: "fc8"
-  bottom: "label"
-  top: "loss"
-}
\ No newline at end of file
diff --git a/examples/imagenet/readme.md b/examples/imagenet/readme.md
index e74e6b86996..0b5b8452188 100644
--- a/examples/imagenet/readme.md
+++ b/examples/imagenet/readme.md
@@ -72,12 +72,11 @@ We will also lay out a protocol buffer for running the solver. Let's make a few
 * The network will be trained with momentum 0.9 and a weight decay of 0.0005.
 * For every 10,000 iterations, we will take a snapshot of the current status.
 
-Sound good? This is implemented in `examples/imagenet/imagenet_solver.prototxt`. Again, you will need to change the first two lines:
+Sound good? This is implemented in `examples/imagenet/imagenet_solver.prototxt`. Again, you will need to change the first line:
 
-    train_net: "imagenet_train.prototxt"
-    test_net: "imagenet_val.prototxt"
+    net: "imagenet_train_val.prototxt"
 
-to point to the actual path if you have changed them.
+to point to the actual path if you have changed it.
 
 Training ImageNet
 -----------------
diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt
index ef851e0f656..980f9382066 100644
--- a/examples/mnist/lenet_consolidated_solver.prototxt
+++ b/examples/mnist/lenet_consolidated_solver.prototxt
@@ -30,8 +30,19 @@ random_seed: 1701
 # solver mode: CPU or GPU
 solver_mode: GPU
 
-# The training protocol buffer definition
-train_net_param {
+# We test on both the test and train set using "stages".  The TEST DATA layers
+# each have a stage, either 'test-on-train-set' or 'test-on-test-set'.
+# test_iter specifies how many forward passes the test should carry out.
+# In the case of MNIST, we have test batch size 100 and 100 test iterations,
+# covering the full 10,000 testing images.
+test_iter: 100
+test_state: { stage: "test-on-test-set" }
+# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K).
+test_iter: 600
+test_state: { stage: "test-on-train-set" }
+
+# The net protocol buffer definition
+net_param {
   name: "LeNet"
   layers {
     name: "mnist"
@@ -43,122 +54,8 @@ train_net_param {
       scale: 0.00390625
       batch_size: 64
     }
+    include: { phase: TRAIN }
   }
-  layers {
-    name: "conv1"
-    type: CONVOLUTION
-    bottom: "data"
-    top: "conv1"
-    blobs_lr: 1
-    blobs_lr: 2
-    convolution_param {
-      num_output: 20
-      kernel_size: 5
-      stride: 1
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
-    }
-  }
-  layers {
-    name: "pool1"
-    type: POOLING
-    bottom: "conv1"
-    top: "pool1"
-    pooling_param {
-      pool: MAX
-      kernel_size: 2
-      stride: 2
-    }
-  }
-  layers {
-    name: "conv2"
-    type: CONVOLUTION
-    bottom: "pool1"
-    top: "conv2"
-    blobs_lr: 1
-    blobs_lr: 2
-    convolution_param {
-      num_output: 50
-      kernel_size: 5
-      stride: 1
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
-    }
-  }
-  layers {
-    name: "pool2"
-    type: POOLING
-    bottom: "conv2"
-    top: "pool2"
-    pooling_param {
-      pool: MAX
-      kernel_size: 2
-      stride: 2
-    }
-  }
-  layers {
-    name: "ip1"
-    type: INNER_PRODUCT
-    bottom: "pool2"
-    top: "ip1"
-    blobs_lr: 1
-    blobs_lr: 2
-    inner_product_param {
-      num_output: 500
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
-    }
-  }
-  layers {
-    name: "relu1"
-    type: RELU
-    bottom: "ip1"
-    top: "ip1"
-  }
-  layers {
-    name: "ip2"
-    type: INNER_PRODUCT
-    bottom: "ip1"
-    top: "ip2"
-    blobs_lr: 1
-    blobs_lr: 2
-    inner_product_param {
-      num_output: 10
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
-    }
-  }
-  layers {
-    name: "loss"
-    type: SOFTMAX_LOSS
-    bottom: "ip2"
-    bottom: "label"
-  }
-}
-
-# test_iter specifies how many forward passes the test should carry out.
-# In the case of MNIST, we have test batch size 100 and 100 test iterations,
-# covering the full 10,000 testing images.
-test_iter: 100
-# The testing protocol buffer definition
-test_net_param {
-  name: "LeNet-test"
   layers {
     name: "mnist"
     type: DATA
@@ -169,120 +66,11 @@ test_net_param {
       scale: 0.00390625
       batch_size: 100
     }
-  }
-  layers {
-    name: "conv1"
-    type: CONVOLUTION
-    bottom: "data"
-    top: "conv1"
-    convolution_param {
-      num_output: 20
-      kernel_size: 5
-      stride: 1
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
-    }
-  }
-  layers {
-    name: "pool1"
-    type: POOLING
-    bottom: "conv1"
-    top: "pool1"
-    pooling_param {
-      pool: MAX
-      kernel_size: 2
-      stride: 2
-    }
-  }
-  layers {
-    name: "conv2"
-    type: CONVOLUTION
-    bottom: "pool1"
-    top: "conv2"
-    convolution_param {
-      num_output: 50
-      kernel_size: 5
-      stride: 1
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
-    }
-  }
-  layers {
-    name: "pool2"
-    type: POOLING
-    bottom: "conv2"
-    top: "pool2"
-    pooling_param {
-      pool: MAX
-      kernel_size: 2
-      stride: 2
-    }
-  }
-  layers {
-    name: "ip1"
-    type: INNER_PRODUCT
-    bottom: "pool2"
-    top: "ip1"
-    inner_product_param {
-      num_output: 500
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
+    include: {
+      phase: TEST
+      stage: "test-on-test-set"
     }
   }
-  layers {
-    name: "relu1"
-    type: RELU
-    bottom: "ip1"
-    top: "ip1"
-  }
-  layers {
-    name: "ip2"
-    type: INNER_PRODUCT
-    bottom: "ip1"
-    top: "ip2"
-    inner_product_param {
-      num_output: 10
-      weight_filler {
-        type: "xavier"
-      }
-      bias_filler {
-        type: "constant"
-      }
-    }
-  }
-  layers {
-    name: "accuracy"
-    type: ACCURACY
-    bottom: "ip2"
-    bottom: "label"
-    top: "accuracy"
-  }
-  layers {
-    name: "loss"
-    type: SOFTMAX_LOSS
-    bottom: "ip2"
-    bottom: "label"
-    top: "loss"
-  }
-}
-
-# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K).
-test_iter: 600
-# The protocol buffer definition to test on the train set
-test_net_param {
-  name: "LeNet-test-on-train"
   layers {
     name: "mnist"
     type: DATA
@@ -293,12 +81,18 @@ test_net_param {
       scale: 0.00390625
       batch_size: 100
     }
+    include: {
+      phase: TEST
+      stage: "test-on-train-set"
+    }
   }
   layers {
     name: "conv1"
     type: CONVOLUTION
     bottom: "data"
     top: "conv1"
+    blobs_lr: 1
+    blobs_lr: 2
     convolution_param {
       num_output: 20
       kernel_size: 5
@@ -327,6 +121,8 @@ test_net_param {
     type: CONVOLUTION
     bottom: "pool1"
     top: "conv2"
+    blobs_lr: 1
+    blobs_lr: 2
     convolution_param {
       num_output: 50
       kernel_size: 5
@@ -355,6 +151,8 @@ test_net_param {
     type: INNER_PRODUCT
     bottom: "pool2"
     top: "ip1"
+    blobs_lr: 1
+    blobs_lr: 2
     inner_product_param {
       num_output: 500
       weight_filler {
@@ -376,6 +174,8 @@ test_net_param {
     type: INNER_PRODUCT
     bottom: "ip1"
     top: "ip2"
+    blobs_lr: 1
+    blobs_lr: 2
     inner_product_param {
       num_output: 10
       weight_filler {
@@ -392,6 +192,7 @@ test_net_param {
     bottom: "ip2"
     bottom: "label"
     top: "accuracy"
+    include: { phase: TEST }
   }
   layers {
     name: "loss"
diff --git a/examples/mnist/lenet_solver.prototxt b/examples/mnist/lenet_solver.prototxt
index 7947f2d6a73..a3b33090472 100644
--- a/examples/mnist/lenet_solver.prototxt
+++ b/examples/mnist/lenet_solver.prototxt
@@ -1,7 +1,5 @@
-# The training protocol buffer definition
-train_net: "lenet_train.prototxt"
-# The testing protocol buffer definition
-test_net: "lenet_test.prototxt"
+# The train/test net protocol buffer definition
+net: "lenet_train_test.prototxt"
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of MNIST, we have test batch size 100 and 100 test iterations,
 # covering the full 10,000 testing images.
diff --git a/examples/mnist/lenet_test.prototxt b/examples/mnist/lenet_test.prototxt
deleted file mode 100644
index 2497f02ae86..00000000000
--- a/examples/mnist/lenet_test.prototxt
+++ /dev/null
@@ -1,118 +0,0 @@
-name: "LeNet-test"
-layers {
-  name: "mnist"
-  type: DATA
-  top: "data"
-  top: "label"
-  data_param {
-    source: "mnist-test-leveldb"
-    scale: 0.00390625
-    batch_size: 100
-  }
-}
-layers {
-  name: "conv1"
-  type: CONVOLUTION
-  bottom: "data"
-  top: "conv1"
-  convolution_param {
-    num_output: 20
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "pool1"
-  type: POOLING
-  bottom: "conv1"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layers {
-  name: "conv2"
-  type: CONVOLUTION
-  bottom: "pool1"
-  top: "conv2"
-  convolution_param {
-    num_output: 50
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "pool2"
-  type: POOLING
-  bottom: "conv2"
-  top: "pool2"
-  pooling_param {
-    pool: MAX
-    kernel_size: 2
-    stride: 2
-  }
-}
-layers {
-  name: "ip1"
-  type: INNER_PRODUCT
-  bottom: "pool2"
-  top: "ip1"
-  inner_product_param {
-    num_output: 500
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "relu1"
-  type: RELU
-  bottom: "ip1"
-  top: "ip1"
-}
-layers {
-  name: "ip2"
-  type: INNER_PRODUCT
-  bottom: "ip1"
-  top: "ip2"
-  inner_product_param {
-    num_output: 10
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layers {
-  name: "accuracy"
-  type: ACCURACY
-  bottom: "ip2"
-  bottom: "label"
-  top: "accuracy"
-}
-layers {
-  name: "loss"
-  type: SOFTMAX_LOSS
-  bottom: "ip2"
-  bottom: "label"
-  top: "loss"
-}
diff --git a/examples/mnist/lenet_train.prototxt b/examples/mnist/lenet_train_test.prototxt
similarity index 82%
rename from examples/mnist/lenet_train.prototxt
rename to examples/mnist/lenet_train_test.prototxt
index e8a1e74e40b..3c77452130c 100644
--- a/examples/mnist/lenet_train.prototxt
+++ b/examples/mnist/lenet_train_test.prototxt
@@ -9,7 +9,21 @@ layers {
     scale: 0.00390625
     batch_size: 64
   }
+  include: { phase: TRAIN }
 }
+layers {
+  name: "mnist"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "mnist-test-leveldb"
+    scale: 0.00390625
+    batch_size: 100
+  }
+  include: { phase: TEST }
+}
+
 layers {
   name: "conv1"
   type: CONVOLUTION
@@ -110,9 +124,18 @@ layers {
     }
   }
 }
+layers {
+  name: "accuracy"
+  type: ACCURACY
+  bottom: "ip2"
+  bottom: "label"
+  top: "accuracy"
+  include: { phase: TEST }
+}
 layers {
   name: "loss"
   type: SOFTMAX_LOSS
   bottom: "ip2"
   bottom: "label"
+  top: "loss"
 }
diff --git a/examples/mnist/mnist_autoencoder_train.prototxt b/examples/mnist/mnist_autoencoder.prototxt
similarity index 88%
rename from examples/mnist/mnist_autoencoder_train.prototxt
rename to examples/mnist/mnist_autoencoder.prototxt
index 90d2cff99b8..ad1e7665bf2 100644
--- a/examples/mnist/mnist_autoencoder_train.prototxt
+++ b/examples/mnist/mnist_autoencoder.prototxt
@@ -8,6 +8,18 @@ layers {
     scale: 0.0039215684
     batch_size: 100
   }
+  include: { phase: TRAIN }
+}
+layers {
+  top: "data"
+  name: "data"
+  type: DATA
+  data_param {
+    source: "mnist-test-leveldb"
+    scale: 0.0039215684
+    batch_size: 100
+  }
+  include: { phase: TEST }
 }
 layers {
   bottom: "data"
@@ -232,4 +244,20 @@ layers {
   bottom: "flatdata"
   name: "loss"
   type: SIGMOID_CROSS_ENTROPY_LOSS
+  include: { phase: TRAIN }
+}
+layers {
+  bottom: "decode1"
+  top: "decode1neuron"
+  name: "decode1neuron"
+  type: SIGMOID
+  include: { phase: TEST }
+}
+layers {
+  bottom: "decode1neuron"
+  bottom: "flatdata"
+  name: "loss"
+  type: EUCLIDEAN_LOSS
+  top: "loss"
+  include: { phase: TEST }
 }
diff --git a/examples/mnist/mnist_autoencoder_solver.prototxt b/examples/mnist/mnist_autoencoder_solver.prototxt
index 06e057d53a4..ae1ddebccd2 100644
--- a/examples/mnist/mnist_autoencoder_solver.prototxt
+++ b/examples/mnist/mnist_autoencoder_solver.prototxt
@@ -1,5 +1,4 @@
-train_net: "mnist_autoencoder_train.prototxt"
-test_net: "mnist_autoencoder_test.prototxt"
+net: "mnist_autoencoder.prototxt"
 test_iter: 50
 test_interval: 100
 test_compute_loss: true
diff --git a/examples/mnist/mnist_autoencoder_test.prototxt b/examples/mnist/mnist_autoencoder_test.prototxt
deleted file mode 100644
index b52364c17fc..00000000000
--- a/examples/mnist/mnist_autoencoder_test.prototxt
+++ /dev/null
@@ -1,146 +0,0 @@
-name: "MNISTAutoencoder"
-layers {
-  top: "data"
-  name: "data"
-  type: DATA
-  data_param {
-    source: "mnist-test-leveldb"
-    scale: 0.0039215684
-    batch_size: 100
-  }
-}
-layers {
-  bottom: "data"
-  top: "flatdata"
-  name: "flatdata"
-  type: FLATTEN
-}
-layers {
-  bottom: "data"
-  top: "encode1"
-  name: "encode1"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 1000
-  }
-}
-layers {
-  bottom: "encode1"
-  top: "encode1neuron"
-  name: "encode1neuron"
-  type: SIGMOID
-}
-layers {
-  bottom: "encode1neuron"
-  top: "encode2"
-  name: "encode2"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 500
-  }
-}
-layers {
-  bottom: "encode2"
-  top: "encode2neuron"
-  name: "encode2neuron"
-  type: SIGMOID
-}
-layers {
-  bottom: "encode2neuron"
-  top: "encode3"
-  name: "encode3"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 250
-  }
-}
-layers {
-  bottom: "encode3"
-  top: "encode3neuron"
-  name: "encode3neuron"
-  type: SIGMOID
-}
-layers {
-  bottom: "encode3neuron"
-  top: "encode4"
-  name: "encode4"
-  type: INNER_PRODUCT
-  blobs_lr: 1
-  blobs_lr: 1
-  weight_decay: 1
-  weight_decay: 0
-  inner_product_param {
-    num_output: 30
-  }
-}
-layers {
-  bottom: "encode4"
-  top: "decode4"
-  name: "decode4"
-  type: INNER_PRODUCT
-  blobs_lr: 1
-  blobs_lr: 1
-  weight_decay: 1
-  weight_decay: 0
-  inner_product_param {
-    num_output: 250
-  }
-}
-layers {
-  bottom: "decode4"
-  top: "decode4neuron"
-  name: "decode4neuron"
-  type: SIGMOID
-}
-layers {
-  bottom: "decode4neuron"
-  top: "decode3"
-  name: "decode3"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 500
-  }
-}
-layers {
-  bottom: "decode3"
-  top: "decode3neuron"
-  name: "decode3neuron"
-  type: SIGMOID
-}
-layers {
-  bottom: "decode3neuron"
-  top: "decode2"
-  name: "decode2"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 1000
-  }
-}
-layers {
-  bottom: "decode2"
-  top: "decode2neuron"
-  name: "decode2neuron"
-  type: SIGMOID
-}
-layers {
-  bottom: "decode2neuron"
-  top: "decode1"
-  name: "decode1"
-  type: INNER_PRODUCT
-  inner_product_param {
-    num_output: 784
-  }
-}
-layers {
-  bottom: "decode1"
-  top: "decode1neuron"
-  name: "decode1neuron"
-  type: SIGMOID
-}
-layers {
-  bottom: "decode1neuron"
-  bottom: "flatdata"
-  name: "loss"
-  type: EUCLIDEAN_LOSS
-  top: "loss"
-}
diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md
index 4f3f4d9ce12..65a780714ae 100644
--- a/examples/mnist/readme.md
+++ b/examples/mnist/readme.md
@@ -177,10 +177,8 @@ The `softmax_loss` layer implements both the softmax and the multinomial logisti
 
 Check out the comments explaining each line in the prototxt:
 
-    # The training protocol buffer definition
-    train_net: "lenet_train.prototxt"
-    # The testing protocol buffer definition
-    test_net: "lenet_test.prototxt"
+    # The train/test net protocol buffer definition
+    net: "lenet_train_test.prototxt"
     # test_iter specifies how many forward passes the test should carry out.
     # In the case of MNIST, we have test batch size 100 and 100 test iterations,
     # covering the full 10,000 testing images.