Skip to content

Commit

Permalink
directly normalize accumulated gradients
Browse files Browse the repository at this point in the history
`SGDSolver::Normalize()` normalizes accumulated gradients by scaling
inversely to the accumulation as `1 / iter_size`.

This fixes accumulation for AdaGrad and is more obvious than fooling
with rates and decays in 55585f5.
  • Loading branch information
shelhamer committed May 28, 2015
1 parent 92ab737 commit 0e7a078
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
1 change: 1 addition & 0 deletions include/caffe/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class SGDSolver : public Solver<Dtype> {
void PreSolve();
Dtype GetLearningRate();
virtual void ApplyUpdate();
virtual void Normalize(int param_id);
virtual void Regularize(int param_id);
virtual void ComputeUpdateValue(int param_id, Dtype rate);
virtual void ClipGradients();
Expand Down
32 changes: 29 additions & 3 deletions src/caffe/solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,21 +487,47 @@ void SGDSolver<Dtype>::ApplyUpdate() {
}
ClipGradients();
for (int param_id = 0; param_id < this->net_->params().size(); ++param_id) {
Normalize(param_id);
Regularize(param_id);
ComputeUpdateValue(param_id, rate / this->param_.iter_size());
ComputeUpdateValue(param_id, rate);
}
this->net_->Update();
}

template <typename Dtype>
void SGDSolver<Dtype>::Normalize(int param_id) {
if (this->param_.iter_size() == 1) { return; }
// Scale gradient to counterbalance accumulation.
const vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
const Dtype accum_normalization = Dtype(1.) / this->param_.iter_size();
switch (Caffe::mode()) {
case Caffe::CPU: {
caffe_scal(net_params[param_id]->count(), accum_normalization,
net_params[param_id]->mutable_cpu_diff());
break;
}
case Caffe::GPU: {
#ifndef CPU_ONLY
caffe_gpu_scal(net_params[param_id]->count(), accum_normalization,
net_params[param_id]->mutable_gpu_diff());
#else
NO_GPU;
#endif
break;
}
default:
LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
}
}

template <typename Dtype>
void SGDSolver<Dtype>::Regularize(int param_id) {
const vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
const vector<float>& net_params_weight_decay =
this->net_->params_weight_decay();
Dtype weight_decay = this->param_.weight_decay();
string regularization_type = this->param_.regularization_type();
Dtype local_decay = weight_decay * net_params_weight_decay[param_id]
* this->param_.iter_size();
Dtype local_decay = weight_decay * net_params_weight_decay[param_id];
switch (Caffe::mode()) {
case Caffe::CPU: {
if (local_decay) {
Expand Down

0 comments on commit 0e7a078

Please sign in to comment.