Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge with lasted develop branch. Optimizer lib2 #2386

Merged
merged 35 commits into from
Jun 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
62cd5c7
"failed to resolve conflict. apply to HEAD"
dzhwinter Jun 4, 2017
3158efe
"move cmake scripts too"
dzhwinter Jun 4, 2017
5b8a0c5
"optimizer remove init create with proto"
dzhwinter Jun 5, 2017
8610ba1
"remove get config proto"
dzhwinter Jun 5, 2017
b4aa0ec
"modify update interface"
dzhwinter Jun 5, 2017
26e9c4e
"add vector alias to make name clear"
dzhwinter Jun 5, 2017
5f9cd8c
"rename test file name"
dzhwinter Jun 5, 2017
b9d024e
"remove useless test file"
dzhwinter Jun 5, 2017
5ab958b
"change size_t type to avoid warning"
dzhwinter Jun 5, 2017
fd8c510
"format name with google style"
dzhwinter Jun 6, 2017
3b1294a
"add checkpoint interface: set state, get state"
dzhwinter Jun 6, 2017
81cad37
"remove comments"
dzhwinter Jun 6, 2017
beb2697
"change header guard to pragma"
dzhwinter Jun 6, 2017
5a1e678
"update macro and fix some part"
dzhwinter Jun 6, 2017
bc26df7
"polish code style and update based review comment"
dzhwinter Jun 7, 2017
b9cb0f2
"update marco"
dzhwinter Jun 7, 2017
6cbbc2e
"add comments"
dzhwinter Jun 7, 2017
f5ff283
"fix comment"
dzhwinter Jun 7, 2017
e456796
"update with comment"
dzhwinter Jun 9, 2017
33b4dee
"update serialization part"
dzhwinter Jun 9, 2017
0fc4201
"update interface"
dzhwinter Jun 9, 2017
b7e68e0
"serialization modify"
dzhwinter Jun 11, 2017
b72e8aa
"seperate serialization proto state"
dzhwinter Jun 13, 2017
1814fc2
"fix lr_policy serialization"
dzhwinter Jun 14, 2017
e148bc1
"remove unused tensor line"
dzhwinter Jun 14, 2017
a46f3fc
"fix double release tensor buffer error."
dzhwinter Jun 14, 2017
df5bc78
"fix tensor shared_ptr"
dzhwinter Jun 15, 2017
65d9e33
"modify config name"
dzhwinter Jun 19, 2017
ec65fa8
"protobuf required to optional"
dzhwinter Jun 19, 2017
baef96e
Merge branch 'develop' into optimizer_lib2
dzhwinter Jun 19, 2017
99849cf
rename Tensor.h
dzhwinter Jun 19, 2017
72b6b26
"ci formatter"
dzhwinter Jun 19, 2017
03884f0
formatter
dzhwinter Jun 19, 2017
a166e52
"formatter in docker"
dzhwinter Jun 19, 2017
33ddc89
formatter in docker
dzhwinter Jun 19, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/util.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
Expand Down
1 change: 1 addition & 0 deletions paddle/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ add_subdirectory(gserver)
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(scripts)
add_subdirectory(optimizer)
add_subdirectory(strings)

# Do not build go directory until go cmake is working smoothly.
Expand Down
16 changes: 16 additions & 0 deletions paddle/optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
include_directories(${CMAKE_CURRENT_BINARY_DIR})

set(OPITMIZER_SRCS
adadelta_optimizer.cc
adagrad_optimizer.cc
adam_optimizer.cc
optimizer.cc
parameter_optimizer.cc
sgd_optimizer.cc
)

add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS})
add_dependencies(paddle_optimizer gen_proto_cpp)

add_simple_unittest(serialization_test)
add_simple_unittest(parameter_optimizer_test)
55 changes: 55 additions & 0 deletions paddle/optimizer/adadelta_optimizer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "adadelta_optimizer.h"
#include <algorithm>
#include <cmath>

namespace paddle {
namespace optimizer {

void AdadeltaOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
const Tensor& grad = *gradient;
Tensor& accum_g = *accum_gradient_;
Tensor& accum_d = *accum_delta_;
Tensor& update_d = *update_delta_;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] = rho_ * accum_g[i] + (1.0 - rho_) * grad[i] * grad[i];

update_d[i] = std::sqrt(accum_d[i] + epsilon_) /
std::sqrt(accum_g[i] + epsilon_) * grad[i];

accum_d[i] = rho_ * accum_d[i] + (1.0 - rho_) * update_d[i] * update_d[i];

param[i] -= learning_rate * update_d[i] + learning_rate * decay_ * param[i];
}
}

const char* AdadeltaOptimizer::SerializeState(int* state_len) {
AdadeltaOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
state.set_num_sample_passed(num_sample_passed_);

TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
TensorToProto(*accum_delta_, state.mutable_accum_delta());
TensorToProto(*update_delta_, state.mutable_update_delta());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
}

void AdadeltaOptimizer::DeserializeState(const std::string& str) {
AdadeltaOptimizerState state;
state.ParseFromString(str);
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();

ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.accum_gradient(), accum_gradient_);
ProtoToTensor(state.accum_delta(), accum_delta_);
ProtoToTensor(state.update_delta(), update_delta_);
}

} // namespace optimizer
} // namespace paddle
39 changes: 39 additions & 0 deletions paddle/optimizer/adadelta_optimizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#pragma once

#include "parameter_optimizer.h"

namespace paddle {
namespace optimizer {

class AdadeltaOptimizer : public ParameterOptimizer {
public:
AdadeltaOptimizer(
Tensor *parameter, LrPolicy *lr, double rho, double epsilon, double decay)
: ParameterOptimizer(parameter, lr),
accum_gradient_(new Tensor(parameter->size())),
accum_delta_(new Tensor(parameter->size())),
update_delta_(new Tensor(parameter->size())),
rho_(rho),
epsilon_(epsilon),
decay_(decay) {}

~AdadeltaOptimizer() {
if (accum_gradient_) delete accum_gradient_;
if (accum_delta_) delete accum_delta_;
if (update_delta_) delete update_delta_;
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);

private:
Tensor *accum_gradient_;
Tensor *accum_delta_;
Tensor *update_delta_;
double rho_;
double epsilon_;
double decay_;
};

} // namespace optimizer
} // namespace paddle
42 changes: 42 additions & 0 deletions paddle/optimizer/adagrad_optimizer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include <cmath>

#include "adagrad_optimizer.h"

namespace paddle {
namespace optimizer {

void AdagradOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
Tensor& accum_g = *accum_gradient_;
const Tensor& grad = *gradient;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] += grad[i] * grad[i];
param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon_) +
learning_rate * decay_ * param[i];
}
}
const char* AdagradOptimizer::SerializeState(int* state_len) {
AdagradOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
state.set_num_sample_passed(num_sample_passed_);

TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
}

void AdagradOptimizer::DeserializeState(const std::string& str) {
AdagradOptimizerState state;
state.ParseFromString(str);
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.accum_gradient(), accum_gradient_);
}

} // namespace optimizer
} // namespace paddle
32 changes: 32 additions & 0 deletions paddle/optimizer/adagrad_optimizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#pragma once

#include "parameter_optimizer.h"

namespace paddle {
namespace optimizer {

class AdagradOptimizer : public ParameterOptimizer {
public:
AdagradOptimizer(Tensor *parameter,
LrPolicy *lr,
double epsilon,
double decay)
: ParameterOptimizer(parameter, lr),
accum_gradient_(new Tensor(parameter->size())),
epsilon_(epsilon),
decay_(decay) {}
~AdagradOptimizer() {
if (accum_gradient_) delete accum_gradient_;
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);

private:
Tensor *accum_gradient_;
double epsilon_;
double decay_;
};

} // namespace optimizer
} // namespace paddle
48 changes: 48 additions & 0 deletions paddle/optimizer/adam_optimizer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include "adam_optimizer.h"
#include <cmath>

namespace paddle {
namespace optimizer {

void AdamOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
double coef1 = 1.0 - std::pow(beta_1_, num_sample_passed_);
double coef2 = 1.0 - std::pow(beta_2_, num_sample_passed_);
learning_rate *= std::sqrt(coef2) / coef1;
Tensor &param = *parameter_;
const Tensor &grad = *gradient;
Tensor &m = *momentums_;
Tensor &v = *velocitys_;
for (size_t i = 0; i < param.size(); ++i) {
m[i] = beta_1_ * m[i] + (1.0 - beta_1_) * grad[i];
v[i] = beta_2_ * v[i] + (1.0 - beta_2_) * grad[i] * grad[i];
param[i] -=
learning_rate * (m[i] / std::sqrt(v[i] + epsilon_) + decay_ * param[i]);
}
}

const char *AdamOptimizer::SerializeState(int *state_len) {
AdamOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*momentums_, state.mutable_momentums());
TensorToProto(*velocitys_, state.mutable_velocitys());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
}

void AdamOptimizer::DeserializeState(const std::string &str) {
AdamOptimizerState state;
state.ParseFromString(str);
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();

ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.momentums(), momentums_);
ProtoToTensor(state.velocitys(), velocitys_);
}
} // namespace optimizer
} // namespace paddle
41 changes: 41 additions & 0 deletions paddle/optimizer/adam_optimizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#pragma once

#include "parameter_optimizer.h"

namespace paddle {
namespace optimizer {

class AdamOptimizer : public ParameterOptimizer {
public:
AdamOptimizer(Tensor *parameter,
LrPolicy *lr,
double beta_1,
double beta_2,
double epsilon,
double decay)
: ParameterOptimizer(parameter, lr),
momentums_(new Tensor(parameter->size())),
velocitys_(new Tensor(parameter->size())),
beta_1_(beta_1),
beta_2_(beta_2),
epsilon_(epsilon),
decay_(decay) {}
~AdamOptimizer() {
if (momentums_) delete momentums_;
if (velocitys_) delete velocitys_;
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);

private:
Tensor *momentums_;
Tensor *velocitys_;
double beta_1_;
double beta_2_;
double epsilon_;
double decay_;
};

} // namespace optimizer
} // namespace paddle
53 changes: 53 additions & 0 deletions paddle/optimizer/lr_policy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#pragma once

#include <algorithm>
#include "OptimizerConfig.pb.h"

namespace paddle {
namespace optimizer {

class LrPolicy {
public:
virtual ~LrPolicy() {}
virtual double LearningRate(const uint64_t num_sample_passed) = 0;
virtual const char *SerializeState(int *state_len) = 0;
virtual void DeserializeState(const std::string &state) = 0;
};

// constant learning rate policy
class ConstLr final : public LrPolicy {
public:
ConstLr(double lr) : learning_rate(lr){};
double LearningRate(const uint64_t num_sample_passed) {
return learning_rate;
}
const char *SerializeState(int *state_len) { return nullptr; }
void DeserializeState(const std::string &state) {}

private:
double learning_rate;
};

class LinearLr final : public LrPolicy {
public:
LinearLr(double lr, double lr_decay_a, double lr_decay_b)
: learning_rate(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {}
double LearningRate(const uint64_t num_sample_passed) {
return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b);
}
const char *SerializeState(int *state_len) {
// TODO(zhihong) : add lr_policy serialization
return nullptr;
}
void DeserializeState(const std::string &state) {
// TODO(zhihong) : add lr_policy serialization
}

private:
double learning_rate;
double lr_decay_a;
double lr_decay_b;
};

} // namespace optimizer
} // namespace paddle
Loading