Skip to content

Commit

Permalink
Merge pull request #789 from Yangqing/sweep
Browse files Browse the repository at this point in the history
Consolidate tools.
  • Loading branch information
Yangqing committed Jul 26, 2014
2 parents c521e7e + 9862d5a commit d5f1a50
Show file tree
Hide file tree
Showing 16 changed files with 218 additions and 187 deletions.
2 changes: 1 addition & 1 deletion examples/cifar10/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Training the model is simple after you have written the network definition proto
cd $CAFFE_ROOT/examples/cifar10
./train_quick.sh

`train_quick.sh` is a simple script, so have a look inside. `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument.
`train_quick.sh` is a simple script, so have a look inside. `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `caffe.bin` with the `train` action, and the solver protobuf text file as its argument.

When you run the code, you will see a lot of messages flying by like this:

Expand Down
16 changes: 8 additions & 8 deletions examples/cifar10/train_full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin \
cifar10_full_solver.prototxt
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=cifar10_full_solver.prototxt

#reduce learning rate by factor of 10
GLOG_logtostderr=1 $TOOLS/train_net.bin \
cifar10_full_solver_lr1.prototxt \
cifar10_full_iter_60000.solverstate
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=cifar10_full_solver_lr1.prototxt \
--resume_point_file=cifar10_full_iter_60000.solverstate

#reduce learning rate by factor of 10
GLOG_logtostderr=1 $TOOLS/train_net.bin \
cifar10_full_solver_lr2.prototxt \
cifar10_full_iter_65000.solverstate
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=cifar10_full_solver_lr2.prototxt \
--resume_point_file=cifar10_full_iter_65000.solverstate
7 changes: 5 additions & 2 deletions examples/cifar10/train_quick.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin cifar10_quick_solver.prototxt
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=cifar10_quick_solver.prototxt

#reduce learning rate by fctor of 10 after 8 epochs
GLOG_logtostderr=1 $TOOLS/train_net.bin cifar10_quick_solver_lr1.prototxt cifar10_quick_iter_4000.solverstate
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=cifar10_quick_solver_lr1.prototxt \
--resume_point_file=cifar10_quick_iter_4000.solverstate
5 changes: 3 additions & 2 deletions examples/imagenet/resume_training.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin \
imagenet_solver.prototxt caffe_imagenet_train_10000.solverstate
GLOG_logtostderr=1 $TOOLS/caffe.bin train\
--solver_proto_file=imagenet_solver.prototxt \
--resume_point_file=caffe_imagenet_train_10000.solverstate

echo "Done."
3 changes: 2 additions & 1 deletion examples/imagenet/train_alexnet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin alexnet_solver.prototxt
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=alexnet_solver.prototxt

echo "Done."
3 changes: 2 additions & 1 deletion examples/imagenet/train_imagenet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin imagenet_solver.prototxt
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=imagenet_solver.prototxt

echo "Done."
2 changes: 1 addition & 1 deletion examples/mnist/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ Training the model is simple after you have written the network definition proto
cd $CAFFE_ROOT/examples/mnist
./train_lenet.sh

`train_lenet.sh` is a simple script, but here are a few explanations: `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `train_net.bin`, with the solver protobuf text file as its argument.
`train_lenet.sh` is a simple script, but here are a few explanations: `GLOG_logtostderr=1` is the google logging flag that prints all the logging messages directly to stderr. The main tool for training is `caffe.bin` with action `train`, with the solver protobuf text file as its argument.

When you run the code, you will see a lot of messages flying by like this:

Expand Down
3 changes: 2 additions & 1 deletion examples/mnist/train_lenet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin lenet_solver.prototxt
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=lenet_solver.prototxt
3 changes: 2 additions & 1 deletion examples/mnist/train_lenet_consolidated.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin lenet_consolidated_solver.prototxt
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=lenet_consolidated_solver.prototxt
3 changes: 2 additions & 1 deletion examples/mnist/train_mnist_autoencoder.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
TOOLS=../../build/tools

GLOG_logtostderr=1 $TOOLS/train_net.bin mnist_autoencoder_solver.prototxt
GLOG_logtostderr=1 $TOOLS/caffe.bin train \
--solver_proto_file=mnist_autoencoder_solver.prototxt
6 changes: 3 additions & 3 deletions include/caffe/caffe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
#ifndef CAFFE_CAFFE_HPP_
#define CAFFE_CAFFE_HPP_

#include "caffe/common.hpp"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/solver.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"

#include "caffe/proto/caffe.pb.h"

#endif // CAFFE_CAFFE_HPP_
178 changes: 178 additions & 0 deletions tools/caffe.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Copyright 2014 BVLC and contributors.
#include <gflags/gflags.h>
#include <glog/logging.h>

#include <cstring>
#include <map>
#include <string>
#include <vector>

#include "caffe/caffe.hpp"

using caffe::Blob;
using caffe::Caffe;
using caffe::Net;
using caffe::Layer;
using caffe::shared_ptr;
using caffe::Timer;
using caffe::vector;


// Used in device query
DEFINE_int32(device_id, 0,
"[devicequery,speedtest] The device id to use.");
// Used in training
DEFINE_string(solver_proto_file, "",
"[train] The protobuf containing the solver definition.");
DEFINE_string(net_proto_file, "",
"[speedtest] The net proto file to use.");
DEFINE_string(resume_point_file, "",
"[train] (optional) The snapshot from which to resume training.");
DEFINE_string(pretrained_net_file, "",
"[train] (optional) A pretrained network to run finetune from. "
"Cannot be set simultaneously with resume_point_file.");
DEFINE_int32(run_iterations, 50,
"[speedtest] The number of iterations to run.");
DEFINE_bool(speedtest_with_gpu, false,
"[speedtest] Test the model with GPU.");

// A simple registry for caffe commands.
typedef int (*BrewFunction)();
typedef std::map<caffe::string, BrewFunction> BrewMap;
BrewMap g_brew_map;

#define RegisterBrewFunction(func) \
namespace { \
class __Registerer_##func { \
public: /* NOLINT */ \
__Registerer_##func() { \
g_brew_map[#func] = &func; \
} \
}; \
__Registerer_##func g_registerer_##func; \
}

static BrewFunction GetBrewFunction(const caffe::string& name) {
if (g_brew_map.count(name)) {
return g_brew_map[name];
} else {
LOG(ERROR) << "Available caffe actions:";
for (BrewMap::iterator it = g_brew_map.begin();
it != g_brew_map.end(); ++it) {
LOG(ERROR) << "\t" << it->first;
}
LOG(FATAL) << "Unknown action: " << name;
return NULL; // not reachable, just to suppress old compiler warnings.
}
}

// caffe actions that could be called in the form
// caffe.bin action
// To do so, define actions as "int action()" functions, and register it with
// RegisterBrewFunction(action);

int devicequery() {
LOG(INFO) << "Querying device_id = " << FLAGS_device_id;
caffe::Caffe::SetDevice(FLAGS_device_id);
caffe::Caffe::DeviceQuery();
return 0;
}
RegisterBrewFunction(devicequery);

int train() {
CHECK_GT(FLAGS_solver_proto_file.size(), 0);

caffe::SolverParameter solver_param;
caffe::ReadProtoFromTextFileOrDie(FLAGS_solver_proto_file, &solver_param);

LOG(INFO) << "Starting Optimization";
caffe::SGDSolver<float> solver(solver_param);
if (FLAGS_resume_point_file.size()) {
LOG(INFO) << "Resuming from " << FLAGS_resume_point_file;
solver.Solve(FLAGS_resume_point_file);
} else if (FLAGS_pretrained_net_file.size()) {
LOG(INFO) << "Finetuning from " << FLAGS_pretrained_net_file;
solver.net()->CopyTrainedLayersFrom(FLAGS_pretrained_net_file);
solver.Solve();
} else {
solver.Solve();
}
LOG(INFO) << "Optimization Done.";
return 0;
}
RegisterBrewFunction(train);

int speedtest() {
// Set device id and mode
if (FLAGS_speedtest_with_gpu) {
LOG(INFO) << "Use GPU with device id " << FLAGS_device_id;
Caffe::SetDevice(FLAGS_device_id);
Caffe::set_mode(Caffe::GPU);
} else {
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
}
// Instantiate the caffe net.
Caffe::set_phase(Caffe::TRAIN);
Net<float> caffe_net(FLAGS_net_proto_file);

// Do a clean forward and backward pass, so that memory allocation are done
// and future iterations will be more stable.
LOG(INFO) << "Performing Forward";
// Note that for the speed benchmark, we will assume that the network does
// not take any input blobs.
float initial_loss;
caffe_net.Forward(vector<Blob<float>*>(), &initial_loss);
LOG(INFO) << "Initial loss: " << initial_loss;
LOG(INFO) << "Performing Backward";
caffe_net.Backward();

const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
const vector<vector<bool> >& bottom_need_backward =
caffe_net.bottom_need_backward();
LOG(INFO) << "*** Benchmark begins ***";
LOG(INFO) << "Testing for " << FLAGS_run_iterations << " iterations.";
Timer total_timer;
total_timer.Start();
Timer forward_timer;
forward_timer.Start();
Timer timer;
for (int i = 0; i < layers.size(); ++i) {
const caffe::string& layername = layers[i]->layer_param().name();
timer.Start();
for (int j = 0; j < FLAGS_run_iterations; ++j) {
layers[i]->Forward(bottom_vecs[i], &top_vecs[i]);
}
LOG(INFO) << layername << "\tforward: " << timer.MilliSeconds() <<
" milli seconds.";
}
LOG(INFO) << "Forward pass: " << forward_timer.MilliSeconds() <<
" milli seconds.";
Timer backward_timer;
backward_timer.Start();
for (int i = layers.size() - 1; i >= 0; --i) {
const caffe::string& layername = layers[i]->layer_param().name();
timer.Start();
for (int j = 0; j < FLAGS_run_iterations; ++j) {
layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
&bottom_vecs[i]);
}
LOG(INFO) << layername << "\tbackward: "
<< timer.MilliSeconds() << " milli seconds.";
}
LOG(INFO) << "Backward pass: " << backward_timer.MilliSeconds() <<
" milli seconds.";
LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() <<
" milli seconds.";
LOG(INFO) << "*** Benchmark ends ***";
return 0;
}
RegisterBrewFunction(speedtest);

int main(int argc, char** argv) {
caffe::GlobalInit(&argc, &argv);
CHECK_EQ(argc, 2);
return GetBrewFunction(caffe::string(argv[1]))();
}
18 changes: 2 additions & 16 deletions tools/device_query.cpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,9 @@
// Copyright 2014 BVLC and contributors.


#include "caffe/common.hpp"
#include "caffe/net.hpp"


using namespace caffe; // NOLINT(build/namespaces)

int main(int argc, char** argv) {
if (argc > 2) {
LOG(ERROR) << "device_query [device_id=0]";
return 1;
}
if (argc == 2) {
LOG(INFO) << "Querying device_id=" << argv[1];
Caffe::SetDevice(atoi(argv[1]));
Caffe::DeviceQuery();
} else {
Caffe::DeviceQuery();
}
LOG(FATAL) << "Deprecated. Use caffe.bin devicequery "
"[--device_id=0] instead.";
return 0;
}
26 changes: 2 additions & 24 deletions tools/finetune_net.cpp
Original file line number Diff line number Diff line change
@@ -1,31 +1,9 @@
// Copyright 2014 BVLC and contributors.
//
// This is a simple script that allows one to quickly finetune a network.
// Usage:
// finetune_net solver_proto_file pretrained_net

#include <string>

#include "caffe/caffe.hpp"

using namespace caffe; // NOLINT(build/namespaces)

int main(int argc, char** argv) {
::google::InitGoogleLogging(argv[0]);
if (argc != 3) {
LOG(ERROR) << "Usage: finetune_net solver_proto_file pretrained_net";
return 1;
}

SolverParameter solver_param;
ReadProtoFromTextFileOrDie(argv[1], &solver_param);

LOG(INFO) << "Starting Optimization";
SGDSolver<float> solver(solver_param);
LOG(INFO) << "Loading from " << argv[2];
solver.net()->CopyTrainedLayersFrom(string(argv[2]));
solver.Solve();
LOG(INFO) << "Optimization Done.";

LOG(FATAL) << "Deprecated. Use caffe.bin train --solver_proto_file=... "
"[--pretrained_net_file=...] instead.";
return 0;
}
Loading

0 comments on commit d5f1a50

Please sign in to comment.