-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2386 from dzhwinter/optimizer_lib2
merge with lasted develop branch. Optimizer lib2
- Loading branch information
Showing
22 changed files
with
1,082 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
include_directories(${CMAKE_CURRENT_BINARY_DIR}) | ||
|
||
set(OPITMIZER_SRCS | ||
adadelta_optimizer.cc | ||
adagrad_optimizer.cc | ||
adam_optimizer.cc | ||
optimizer.cc | ||
parameter_optimizer.cc | ||
sgd_optimizer.cc | ||
) | ||
|
||
add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS}) | ||
add_dependencies(paddle_optimizer gen_proto_cpp) | ||
|
||
add_simple_unittest(serialization_test) | ||
add_simple_unittest(parameter_optimizer_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#include "adadelta_optimizer.h" | ||
#include <algorithm> | ||
#include <cmath> | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
void AdadeltaOptimizer::Update(const Tensor* gradient) { | ||
num_sample_passed_ += 1; | ||
double learning_rate = lr_policy_->LearningRate(num_sample_passed_); | ||
Tensor& param = *parameter_; | ||
const Tensor& grad = *gradient; | ||
Tensor& accum_g = *accum_gradient_; | ||
Tensor& accum_d = *accum_delta_; | ||
Tensor& update_d = *update_delta_; | ||
for (size_t i = 0; i < param.size(); ++i) { | ||
accum_g[i] = rho_ * accum_g[i] + (1.0 - rho_) * grad[i] * grad[i]; | ||
|
||
update_d[i] = std::sqrt(accum_d[i] + epsilon_) / | ||
std::sqrt(accum_g[i] + epsilon_) * grad[i]; | ||
|
||
accum_d[i] = rho_ * accum_d[i] + (1.0 - rho_) * update_d[i] * update_d[i]; | ||
|
||
param[i] -= learning_rate * update_d[i] + learning_rate * decay_ * param[i]; | ||
} | ||
} | ||
|
||
const char* AdadeltaOptimizer::SerializeState(int* state_len) { | ||
AdadeltaOptimizerState state; | ||
// TODO(zhihong) : add lr_policy serialization | ||
state.set_num_sample_passed(num_sample_passed_); | ||
|
||
TensorToProto(*parameter_, state.mutable_parameter()); | ||
TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); | ||
TensorToProto(*accum_delta_, state.mutable_accum_delta()); | ||
TensorToProto(*update_delta_, state.mutable_update_delta()); | ||
auto str = state.SerializeAsString(); | ||
*state_len = str.size(); | ||
return str.c_str(); | ||
} | ||
|
||
void AdadeltaOptimizer::DeserializeState(const std::string& str) { | ||
AdadeltaOptimizerState state; | ||
state.ParseFromString(str); | ||
// TODO(zhihong) : add lr_policy DeserializeState | ||
num_sample_passed_ = state.num_sample_passed(); | ||
|
||
ProtoToTensor(state.parameter(), parameter_); | ||
ProtoToTensor(state.accum_gradient(), accum_gradient_); | ||
ProtoToTensor(state.accum_delta(), accum_delta_); | ||
ProtoToTensor(state.update_delta(), update_delta_); | ||
} | ||
|
||
} // namespace optimizer | ||
} // namespace paddle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#pragma once | ||
|
||
#include "parameter_optimizer.h" | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
class AdadeltaOptimizer : public ParameterOptimizer { | ||
public: | ||
AdadeltaOptimizer( | ||
Tensor *parameter, LrPolicy *lr, double rho, double epsilon, double decay) | ||
: ParameterOptimizer(parameter, lr), | ||
accum_gradient_(new Tensor(parameter->size())), | ||
accum_delta_(new Tensor(parameter->size())), | ||
update_delta_(new Tensor(parameter->size())), | ||
rho_(rho), | ||
epsilon_(epsilon), | ||
decay_(decay) {} | ||
|
||
~AdadeltaOptimizer() { | ||
if (accum_gradient_) delete accum_gradient_; | ||
if (accum_delta_) delete accum_delta_; | ||
if (update_delta_) delete update_delta_; | ||
} | ||
void Update(const Tensor *gradient); | ||
const char *SerializeState(int *state_len); | ||
void DeserializeState(const std::string &state); | ||
|
||
private: | ||
Tensor *accum_gradient_; | ||
Tensor *accum_delta_; | ||
Tensor *update_delta_; | ||
double rho_; | ||
double epsilon_; | ||
double decay_; | ||
}; | ||
|
||
} // namespace optimizer | ||
} // namespace paddle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#include <cmath> | ||
|
||
#include "adagrad_optimizer.h" | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
void AdagradOptimizer::Update(const Tensor* gradient) { | ||
num_sample_passed_ += 1; | ||
double learning_rate = lr_policy_->LearningRate(num_sample_passed_); | ||
Tensor& param = *parameter_; | ||
Tensor& accum_g = *accum_gradient_; | ||
const Tensor& grad = *gradient; | ||
for (size_t i = 0; i < param.size(); ++i) { | ||
accum_g[i] += grad[i] * grad[i]; | ||
param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon_) + | ||
learning_rate * decay_ * param[i]; | ||
} | ||
} | ||
const char* AdagradOptimizer::SerializeState(int* state_len) { | ||
AdagradOptimizerState state; | ||
// TODO(zhihong) : add lr_policy serialization | ||
state.set_num_sample_passed(num_sample_passed_); | ||
|
||
TensorToProto(*parameter_, state.mutable_parameter()); | ||
TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); | ||
auto str = state.SerializeAsString(); | ||
*state_len = str.size(); | ||
return str.c_str(); | ||
} | ||
|
||
void AdagradOptimizer::DeserializeState(const std::string& str) { | ||
AdagradOptimizerState state; | ||
state.ParseFromString(str); | ||
// TODO(zhihong) : add lr_policy DeserializeState | ||
num_sample_passed_ = state.num_sample_passed(); | ||
ProtoToTensor(state.parameter(), parameter_); | ||
ProtoToTensor(state.accum_gradient(), accum_gradient_); | ||
} | ||
|
||
} // namespace optimizer | ||
} // namespace paddle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#pragma once | ||
|
||
#include "parameter_optimizer.h" | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
class AdagradOptimizer : public ParameterOptimizer { | ||
public: | ||
AdagradOptimizer(Tensor *parameter, | ||
LrPolicy *lr, | ||
double epsilon, | ||
double decay) | ||
: ParameterOptimizer(parameter, lr), | ||
accum_gradient_(new Tensor(parameter->size())), | ||
epsilon_(epsilon), | ||
decay_(decay) {} | ||
~AdagradOptimizer() { | ||
if (accum_gradient_) delete accum_gradient_; | ||
} | ||
void Update(const Tensor *gradient); | ||
const char *SerializeState(int *state_len); | ||
void DeserializeState(const std::string &state); | ||
|
||
private: | ||
Tensor *accum_gradient_; | ||
double epsilon_; | ||
double decay_; | ||
}; | ||
|
||
} // namespace optimizer | ||
} // namespace paddle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#include "adam_optimizer.h" | ||
#include <cmath> | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
void AdamOptimizer::Update(const Tensor *gradient) { | ||
num_sample_passed_ += 1; | ||
double learning_rate = lr_policy_->LearningRate(num_sample_passed_); | ||
double coef1 = 1.0 - std::pow(beta_1_, num_sample_passed_); | ||
double coef2 = 1.0 - std::pow(beta_2_, num_sample_passed_); | ||
learning_rate *= std::sqrt(coef2) / coef1; | ||
Tensor ¶m = *parameter_; | ||
const Tensor &grad = *gradient; | ||
Tensor &m = *momentums_; | ||
Tensor &v = *velocitys_; | ||
for (size_t i = 0; i < param.size(); ++i) { | ||
m[i] = beta_1_ * m[i] + (1.0 - beta_1_) * grad[i]; | ||
v[i] = beta_2_ * v[i] + (1.0 - beta_2_) * grad[i] * grad[i]; | ||
param[i] -= | ||
learning_rate * (m[i] / std::sqrt(v[i] + epsilon_) + decay_ * param[i]); | ||
} | ||
} | ||
|
||
const char *AdamOptimizer::SerializeState(int *state_len) { | ||
AdamOptimizerState state; | ||
// TODO(zhihong) : add lr_policy serialization | ||
state.set_num_sample_passed(num_sample_passed_); | ||
TensorToProto(*parameter_, state.mutable_parameter()); | ||
TensorToProto(*momentums_, state.mutable_momentums()); | ||
TensorToProto(*velocitys_, state.mutable_velocitys()); | ||
auto str = state.SerializeAsString(); | ||
*state_len = str.size(); | ||
return str.c_str(); | ||
} | ||
|
||
void AdamOptimizer::DeserializeState(const std::string &str) { | ||
AdamOptimizerState state; | ||
state.ParseFromString(str); | ||
// TODO(zhihong) : add lr_policy DeserializeState | ||
num_sample_passed_ = state.num_sample_passed(); | ||
|
||
ProtoToTensor(state.parameter(), parameter_); | ||
ProtoToTensor(state.momentums(), momentums_); | ||
ProtoToTensor(state.velocitys(), velocitys_); | ||
} | ||
} // namespace optimizer | ||
} // namespace paddle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#pragma once | ||
|
||
#include "parameter_optimizer.h" | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
class AdamOptimizer : public ParameterOptimizer { | ||
public: | ||
AdamOptimizer(Tensor *parameter, | ||
LrPolicy *lr, | ||
double beta_1, | ||
double beta_2, | ||
double epsilon, | ||
double decay) | ||
: ParameterOptimizer(parameter, lr), | ||
momentums_(new Tensor(parameter->size())), | ||
velocitys_(new Tensor(parameter->size())), | ||
beta_1_(beta_1), | ||
beta_2_(beta_2), | ||
epsilon_(epsilon), | ||
decay_(decay) {} | ||
~AdamOptimizer() { | ||
if (momentums_) delete momentums_; | ||
if (velocitys_) delete velocitys_; | ||
} | ||
void Update(const Tensor *gradient); | ||
const char *SerializeState(int *state_len); | ||
void DeserializeState(const std::string &state); | ||
|
||
private: | ||
Tensor *momentums_; | ||
Tensor *velocitys_; | ||
double beta_1_; | ||
double beta_2_; | ||
double epsilon_; | ||
double decay_; | ||
}; | ||
|
||
} // namespace optimizer | ||
} // namespace paddle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#pragma once | ||
|
||
#include <algorithm> | ||
#include "OptimizerConfig.pb.h" | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
class LrPolicy { | ||
public: | ||
virtual ~LrPolicy() {} | ||
virtual double LearningRate(const uint64_t num_sample_passed) = 0; | ||
virtual const char *SerializeState(int *state_len) = 0; | ||
virtual void DeserializeState(const std::string &state) = 0; | ||
}; | ||
|
||
// constant learning rate policy | ||
class ConstLr final : public LrPolicy { | ||
public: | ||
ConstLr(double lr) : learning_rate(lr){}; | ||
double LearningRate(const uint64_t num_sample_passed) { | ||
return learning_rate; | ||
} | ||
const char *SerializeState(int *state_len) { return nullptr; } | ||
void DeserializeState(const std::string &state) {} | ||
|
||
private: | ||
double learning_rate; | ||
}; | ||
|
||
class LinearLr final : public LrPolicy { | ||
public: | ||
LinearLr(double lr, double lr_decay_a, double lr_decay_b) | ||
: learning_rate(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {} | ||
double LearningRate(const uint64_t num_sample_passed) { | ||
return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b); | ||
} | ||
const char *SerializeState(int *state_len) { | ||
// TODO(zhihong) : add lr_policy serialization | ||
return nullptr; | ||
} | ||
void DeserializeState(const std::string &state) { | ||
// TODO(zhihong) : add lr_policy serialization | ||
} | ||
|
||
private: | ||
double learning_rate; | ||
double lr_decay_a; | ||
double lr_decay_b; | ||
}; | ||
|
||
} // namespace optimizer | ||
} // namespace paddle |
Oops, something went wrong.