From c840119b6f6fbe80e91884deecfa46b39dea753e Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Wed, 9 Oct 2013 22:55:53 -0700 Subject: [PATCH] dropout serious bugfix. Seems to be working... --- src/caffe/layers/dropout_layer.cu | 4 +--- src/caffe/proto/caffe.proto | 5 ++--- src/caffe/test/test_protobuf.cpp | 1 - src/programs/demo_mnist.cpp | 5 ++++- src/programs/train_alexnet.cpp | 5 +++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu index 177fe2eeea9..df94f2deb24 100644 --- a/src/caffe/layers/dropout_layer.cu +++ b/src/caffe/layers/dropout_layer.cu @@ -97,9 +97,7 @@ __global__ void DropoutBackward(const int n, const Dtype* in_diff, Dtype* out_diff) { int index = threadIdx.x + blockIdx.x * blockDim.x; if (index < n) { - if (mask[index] > threshold) { - out_diff[index] = in_diff[index] * scale; - } + out_diff[index] = in_diff[index] * scale * (mask[index] > threshold); } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 0231ad94453..3961a42223f 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -23,9 +23,8 @@ message Datum { } message FillerParameter { - // The filler type. In default we will set it to Gaussian for easy - // debugging. - optional string type = 1 [default = 'gaussian']; + // The filler type. + optional string type = 1 [default = 'constant']; optional float value = 2 [default = 0]; // the value in constant filler optional float min = 3 [default = 0]; // the min value in uniform filler optional float max = 4 [default = 1]; // the max value in uniform filler diff --git a/src/caffe/test/test_protobuf.cpp b/src/caffe/test/test_protobuf.cpp index 20032687c1f..11cdcf69048 100644 --- a/src/caffe/test/test_protobuf.cpp +++ b/src/caffe/test/test_protobuf.cpp @@ -26,5 +26,4 @@ TEST_F(ProtoTest, TestSerialization) { EXPECT_TRUE(true); } - } diff --git a/src/programs/demo_mnist.cpp b/src/programs/demo_mnist.cpp index f442fe66573..37e697e9233 100644 --- a/src/programs/demo_mnist.cpp +++ b/src/programs/demo_mnist.cpp @@ -19,6 +19,7 @@ using namespace caffe; int main(int argc, char** argv) { cudaSetDevice(0); Caffe::set_mode(Caffe::GPU); + Caffe::set_phase(Caffe::TRAIN); NetParameter net_param; ReadProtoFromTextFile("data/lenet.prototxt", @@ -34,7 +35,7 @@ int main(int argc, char** argv) { SolverParameter solver_param; solver_param.set_base_lr(0.01); - solver_param.set_display(0); + solver_param.set_display(1); solver_param.set_max_iter(6000); solver_param.set_lr_policy("inv"); solver_param.set_gamma(0.0001); @@ -63,6 +64,8 @@ int main(int argc, char** argv) { Net caffe_traintest_net(traintest_net_param, bottom_vec); caffe_traintest_net.CopyTrainedLayersFrom(trained_net_param); + Caffe::set_phase(Caffe::TEST); + // Test run double train_accuracy = 0; int batch_size = traintest_net_param.layers(0).layer().batchsize(); diff --git a/src/programs/train_alexnet.cpp b/src/programs/train_alexnet.cpp index 3fc2139cc9d..99e37c3ef74 100644 --- a/src/programs/train_alexnet.cpp +++ b/src/programs/train_alexnet.cpp @@ -19,6 +19,7 @@ using namespace caffe; int main(int argc, char** argv) { cudaSetDevice(0); Caffe::set_mode(Caffe::GPU); + Caffe::set_phase(Caffe::TRAIN); NetParameter net_param; ReadProtoFromTextFile(argv[1], @@ -49,11 +50,11 @@ int main(int argc, char** argv) { SolverParameter solver_param; solver_param.set_base_lr(0.01); solver_param.set_display(1); - solver_param.set_max_iter(2); + solver_param.set_max_iter(60000); solver_param.set_lr_policy("fixed"); solver_param.set_momentum(0.9); solver_param.set_weight_decay(0.0005); - solver_param.set_snapshot(1); + solver_param.set_snapshot(5000); solver_param.set_snapshot_prefix("alexnet"); LOG(ERROR) << "Starting Optimization";