Skip to content

Commit

Permalink
[cherry pick to 2.0-beta]update optimizer (#26711) (#26943)
Browse files Browse the repository at this point in the history
* update optimizer (#26711)

* update doc

* update doc

* fix optimizer sample code

* add default value for adamw weight_decay

* fix adamw

* change LearningRateDecay to _LRScheduler

* fix adamw;notest

* fix load;notest

* remove file

* bug fix

* fix code style

* bug fix

* add ut

* adamw support weight_decay=0

* fix ut

* fix set_lr doc

* fix doc

* change parameters place

* fix sample code
  • Loading branch information
MRXLT authored Sep 4, 2020
1 parent 5f239a1 commit cbb0f59
Show file tree
Hide file tree
Showing 11 changed files with 239 additions and 169 deletions.
1 change: 1 addition & 0 deletions python/paddle/fluid/dygraph/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def load_dygraph(model_path, keep_name_table=False):
# NOTE: `jit.save` doesn't save optimizer state
else:
# Load state dict by `save_dygraph` save format
para_dict = {}
if os.path.exists(params_file_path):
with open(params_file_path, 'rb') as f:
para_dict = pickle.load(f) if six.PY2 else pickle.load(
Expand Down
13 changes: 13 additions & 0 deletions python/paddle/fluid/tests/unittests/test_adam_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,19 @@ def test_adam_op_with_set_lr(self):
shape=[1], value=lr, dtype='float32')
adam.set_lr(lr_var)

def test_adam_op_invalid_input(self):
paddle.disable_static()
linear = paddle.nn.Linear(10, 10)
with self.assertRaises(ValueError):
adam = paddle.optimizer.Adam(
0.1, beta1=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.Adam(
0.1, beta2=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.Adam(
0.1, epsilon=-1, parameters=linear.parameters())


if __name__ == "__main__":
unittest.main()
16 changes: 16 additions & 0 deletions python/paddle/fluid/tests/unittests/test_adamax_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,5 +184,21 @@ def adamax_step(inputs, attributes):
return param_out, moment_out, inf_norm_out


class TestAdamaxOpV2(unittest.TestCase):
def test_adamax_op_invalid_input(self):
import paddle
paddle.disable_static()
linear = paddle.nn.Linear(10, 10)
with self.assertRaises(ValueError):
adam = paddle.optimizer.Adamax(
0.1, beta1=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.Adamax(
0.1, beta2=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.Adamax(
0.1, epsilon=-1, parameters=linear.parameters())


if __name__ == "__main__":
unittest.main()
13 changes: 13 additions & 0 deletions python/paddle/fluid/tests/unittests/test_adamw_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ def test_adamw_op(self):
rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss])
assert rets[0] is not None

def test_adamw_op_invalid_input(self):
paddle.disable_static()
linear = paddle.nn.Linear(10, 10)
with self.assertRaises(ValueError):
adam = paddle.optimizer.AdamW(
0.1, beta1=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.AdamW(
0.1, beta2=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.AdamW(
0.1, epsilon=-1, parameters=linear.parameters())


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,7 @@ def test_lr_decay_natural_exp(self):
a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

linear = fluid.dygraph.nn.Linear(10, 10)

a = fluid.dygraph.to_variable(a)

b = linear(a)

loss = fluid.layers.reduce_mean(b)
Expand Down
13 changes: 13 additions & 0 deletions python/paddle/fluid/tests/unittests/test_rmsprop_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,19 @@ def test_raise_error(self):
learning_rate=0.1,
momentum=None)

def test_rmsprop_op_invalid_input(self):
paddle.disable_static()
linear = paddle.nn.Linear(10, 10)
with self.assertRaises(ValueError):
adam = paddle.optimizer.RMSProp(
0.1, epsilon=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.RMSProp(
0.1, momentum=-1, parameters=linear.parameters())
with self.assertRaises(ValueError):
adam = paddle.optimizer.RMSProp(
0.1, rho=-1, parameters=linear.parameters())


if __name__ == "__main__":
unittest.main()
12 changes: 9 additions & 3 deletions python/paddle/optimizer/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ class Adam(Optimizer):
Related paper: `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_
Args:
learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value or a LearningRateDecay. The default value is 0.001.
learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``.
It can be a float value or a _LRScheduler. The default value is 0.001.
beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates.
It should be a float number or a Tensor with shape [1] and data type as float32.
The default value is 0.9.
Expand All @@ -55,7 +55,7 @@ class Adam(Optimizer):
The default value is 0.999.
epsilon (float, optional): A small float value for numerical stability.
The default value is 1e-08.
parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
Expand Down Expand Up @@ -143,6 +143,12 @@ def __init__(self,
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
if not 0 <= beta1 < 1:
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
if not 0 <= beta2 < 1:
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
if not 0 <= epsilon:
raise ValueError("Invaild value of epsilon, expect epsilon >= 0.")
super(Adam, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
Expand Down
12 changes: 9 additions & 3 deletions python/paddle/optimizer/adamax.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ class Adamax(Optimizer):
it is added here for numerical stability to prevent the division by 0 error.
Args:
learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value or a LearningRateDecay. The default value is 0.001.
learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``.
It can be a float value or a _LRScheduler. The default value is 0.001.
beta1 (float, optional): The exponential decay rate for the 1st moment estimates.
The default value is 0.9.
beta2 (float, optional): The exponential decay rate for the 2nd moment estimates.
The default value is 0.999.
epsilon (float, optional): A small float value for numerical stability.
The default value is 1e-08.
parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
Expand Down Expand Up @@ -118,6 +118,12 @@ def __init__(self,
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
if not 0 <= beta1 < 1:
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
if not 0 <= beta2 < 1:
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
if not 0 <= epsilon:
raise ValueError("Invaild value of epsilon, expect epsilon >= 0.")
super(Adamax, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
Expand Down
Loading

0 comments on commit cbb0f59

Please sign in to comment.