From 0d90a8aa7fa2819d046ed7cf28adb27c0b080acc Mon Sep 17 00:00:00 2001 From: zhouwei25 Date: Mon, 28 Sep 2020 11:56:30 +0000 Subject: [PATCH] fix doc and unittest of 2.0 lr_scheduler --- python/paddle/__init__.py | 7 - .../unittests/test_imperative_optimizer_v2.py | 123 +++++++++--------- python/paddle/framework/__init__.py | 13 -- python/paddle/nn/__init__.py | 7 - python/paddle/nn/functional/__init__.py | 8 -- python/paddle/nn/functional/learning_rate.py | 29 ----- python/paddle/nn/layer/__init__.py | 7 - python/paddle/nn/layer/learning_rate.py | 25 ---- python/paddle/optimizer/lr_scheduler.py | 118 ++++++++--------- 9 files changed, 113 insertions(+), 224 deletions(-) delete mode 100644 python/paddle/nn/functional/learning_rate.py delete mode 100644 python/paddle/nn/layer/learning_rate.py diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index e707de8e06864..c1846095da048 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -237,13 +237,6 @@ from .framework import SaveLoadConfig #DEFINE_ALIAS from .framework import DataParallel #DEFINE_ALIAS -from .framework import NoamDecay #DEFINE_ALIAS -from .framework import PiecewiseDecay #DEFINE_ALIAS -from .framework import NaturalExpDecay #DEFINE_ALIAS -from .framework import ExponentialDecay #DEFINE_ALIAS -from .framework import InverseTimeDecay #DEFINE_ALIAS -from .framework import PolynomialDecay #DEFINE_ALIAS -from .framework import CosineDecay #DEFINE_ALIAS from .framework import set_default_dtype #DEFINE_ALIAS from .framework import get_default_dtype #DEFINE_ALIAS diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py index 887e50f07c55c..2bc6b0f4ed05b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py @@ -23,7 +23,7 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid.optimizer import SGDOptimizer, Adam, MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer +from paddle.fluid.optimizer import MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer from paddle.fluid.dygraph import Linear from paddle.fluid.dygraph.base import to_variable @@ -126,6 +126,9 @@ def _check_mlp(self, place=None): avg_loss.backward() optimizer.minimize(avg_loss) + if isinstance(optimizer._learning_rate, + paddle.optimizer._LRScheduler): + optimizer._learning_rate.step() mlp.clear_gradients() dy_param_value = {} for param in mlp.parameters(): @@ -181,6 +184,9 @@ def _check_mlp(self, place=None): feed={"pixel": static_x_data, "label": y_data}, fetch_list=fetch_list) + if isinstance(optimizer._learning_rate, + paddle.optimizer._LRScheduler): + optimizer._learning_rate.step() static_param_value = {} static_out = out[0] @@ -196,106 +202,93 @@ def _check_mlp(self, place=None): self.assertTrue(np.allclose(value, dy_param_value[key])) -class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): +class TestImperativeOptimizerPiecewiseLR(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): bd = [3, 6, 9] - optimizer = SGDOptimizer( - learning_rate=paddle.optimizer.PiecewiseLR( + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.PiecewiseLR( boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]), - parameter_list=parameter_list) + parameters=parameter_list) return optimizer def get_optimizer(self): bd = [3, 6, 9] - optimizer = SGDOptimizer(learning_rate=paddle.optimizer.PiecewiseLR( - boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)])) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.PiecewiseLR( + boundaries=bd, + values=[0.1 * (0.1**i) for i in range(len(bd) + 1)])) return optimizer def test_sgd(self): self._check_mlp() -class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): +class TestImperativeOptimizerNaturalExpLR(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True), - parameter_list=parameter_list) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.NaturalExpLR( + learning_rate=0.1, gamma=0.5, parameters=parameter_list)) return optimizer def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.NaturalExpLR( + learning_rate=0.1, gamma=0.5)) return optimizer def test_sgd(self): self._check_mlp() -class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): +class TestImperativeOptimizerExponentialLR(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True), - parameter_list=parameter_list) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.ExponentialLR( + learning_rate=0.1, gamma=0.5), + parameters=parameter_list) return optimizer def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.ExponentialLR( + learning_rate=0.1, gamma=0.05)) return optimizer def test_sgd(self): self._check_mlp() -class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): +class TestImperativeOptimizerInverseTimeLR(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): - optimizer = Adam( - learning_rate=fluid.layers.inverse_time_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True), - parameter_list=parameter_list) + optimizer = paddle.optimizer.Adam( + learning_rate=paddle.optimizer.lr_scheduler.InverseTimeLR( + learning_rate=0.1, gamma=0.5), + parameters=parameter_list) return optimizer def get_optimizer(self): - optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay( - learning_rate=0.1, - decay_steps=10000, - decay_rate=0.5, - staircase=True)) + optimizer = paddle.optimizer.Adam( + learning_rate=paddle.optimizer.lr_scheduler.InverseTimeLR( + learning_rate=0.1, gamma=0.5)) return optimizer def test_adam(self): self._check_mlp() -class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): +class TestImperativeOptimizerPolynomialLR(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.polynomial_decay( + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.PolynomialLR( learning_rate=0.1, decay_steps=5, cycle=self.cycle), - parameter_list=parameter_list) + parameters=parameter_list) return optimizer def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay( - learning_rate=0.1, decay_steps=5, cycle=self.cycle)) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.PolynomialLR( + learning_rate=0.1, decay_steps=5, cycle=self.cycle)) return optimizer def test_sgd_cycle(self): @@ -307,17 +300,18 @@ def test_sgd(self): self._check_mlp() -class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase): +class TestImperativeOptimizerCosineLR(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.cosine_decay( - learning_rate=0.1, step_each_epoch=10000, epochs=120), - parameter_list=parameter_list) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.CosineAnnealingLR( + learning_rate=0.1, T_max=120), + parameter=parameter_list) return optimizer def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay( - learning_rate=0.1, step_each_epoch=10000, epochs=120)) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.CosineAnnealingLR( + learning_rate=0.1, T_max=120)) return optimizer def test_sgd(self): @@ -326,15 +320,16 @@ def test_sgd(self): class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): def get_optimizer_dygraph(self, parameter_list): - optimizer = SGDOptimizer( - learning_rate=fluid.layers.noam_decay( + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.NoamLR( d_model=512, warmup_steps=8000), - parameter_list=parameter_list) + parameter=parameter_list) return optimizer def get_optimizer(self): - optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay( - d_model=512, warmup_steps=8000)) + optimizer = paddle.optimizer.SGD( + learning_rate=paddle.optimizer.lr_scheduler.NoamLR( + d_model=512, warmup_steps=8000)) return optimizer def test_sgd(self): diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index 2ce442add2e02..910802ac28645 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -24,11 +24,6 @@ 'no_grad', 'DataParallel' ] -__all__ += [ - 'NoamDecay', 'PiecewiseDecay', 'NaturalExpDecay', 'ExponentialDecay', - 'InverseTimeDecay', 'PolynomialDecay', 'CosineDecay' -] - from . import random from .random import manual_seed from .framework import get_default_dtype @@ -52,11 +47,3 @@ from .io import load from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS - -from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS -from ..fluid.dygraph.learning_rate_scheduler import PiecewiseDecay #DEFINE_ALIAS -from ..fluid.dygraph.learning_rate_scheduler import NaturalExpDecay #DEFINE_ALIAS -from ..fluid.dygraph.learning_rate_scheduler import ExponentialDecay #DEFINE_ALIAS -from ..fluid.dygraph.learning_rate_scheduler import InverseTimeDecay #DEFINE_ALIAS -from ..fluid.dygraph.learning_rate_scheduler import PolynomialDecay #DEFINE_ALIAS -from ..fluid.dygraph.learning_rate_scheduler import CosineDecay #DEFINE_ALIAS diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 79583f344f0c1..0820adfb12417 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -120,13 +120,6 @@ # from .layer.conv import TreeConv #DEFINE_ALIAS # from .layer.conv import Conv1D #DEFINE_ALIAS from .layer.extension import RowConv #DEFINE_ALIAS -# from .layer.learning_rate import CosineDecay #DEFINE_ALIAS -# from .layer.learning_rate import ExponentialDecay #DEFINE_ALIAS -# from .layer.learning_rate import InverseTimeDecay #DEFINE_ALIAS -# from .layer.learning_rate import NaturalExpDecay #DEFINE_ALIAS -# from .layer.learning_rate import NoamDecay #DEFINE_ALIAS -# from .layer.learning_rate import PiecewiseDecay #DEFINE_ALIAS -# from .layer.learning_rate import PolynomialDecay #DEFINE_ALIAS from .layer.common import Linear # from .layer.loss import NCELoss #DEFINE_ALIAS from .layer.loss import BCEWithLogitsLoss #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 163c249ab3745..a90dee4d326a3 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -97,14 +97,6 @@ from .extension import temporal_shift #DEFINE_ALIAS from .extension import warpctc #DEFINE_ALIAS from .extension import diag_embed #DEFINE_ALIAS -from .learning_rate import cosine_decay #DEFINE_ALIAS -from .learning_rate import exponential_decay #DEFINE_ALIAS -from .learning_rate import inverse_time_decay #DEFINE_ALIAS -from .learning_rate import natural_exp_decay #DEFINE_ALIAS -from .learning_rate import noam_decay #DEFINE_ALIAS -from .learning_rate import piecewise_decay #DEFINE_ALIAS -from .learning_rate import polynomial_decay #DEFINE_ALIAS -from .learning_rate import linear_lr_warmup #DEFINE_ALIAS # from .lod import sequence_concat #DEFINE_ALIAS # from .lod import sequence_conv #DEFINE_ALIAS # from .lod import sequence_enumerate #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/learning_rate.py b/python/paddle/nn/functional/learning_rate.py deleted file mode 100644 index 83837fc5d46ac..0000000000000 --- a/python/paddle/nn/functional/learning_rate.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TODO: define learning rate decay -from ...fluid.layers import cosine_decay #DEFINE_ALIAS -from ...fluid.layers import exponential_decay #DEFINE_ALIAS -from ...fluid.layers import inverse_time_decay #DEFINE_ALIAS -from ...fluid.layers import natural_exp_decay #DEFINE_ALIAS -from ...fluid.layers import noam_decay #DEFINE_ALIAS -from ...fluid.layers import piecewise_decay #DEFINE_ALIAS -from ...fluid.layers import polynomial_decay #DEFINE_ALIAS -from ...fluid.layers import linear_lr_warmup #DEFINE_ALIAS - -__all__ = [ - 'cosine_decay', 'exponential_decay', 'inverse_time_decay', - 'natural_exp_decay', 'noam_decay', 'piecewise_decay', 'polynomial_decay', - 'linear_lr_warmup' -] diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 760af09f1f2f5..8b5dff9921e6f 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -87,13 +87,6 @@ # from .conv import TreeConv #DEFINE_ALIAS # from .conv import Conv1D #DEFINE_ALIAS from .extension import RowConv #DEFINE_ALIAS -# from .learning_rate import CosineDecay #DEFINE_ALIAS -# from .learning_rate import ExponentialDecay #DEFINE_ALIAS -# from .learning_rate import InverseTimeDecay #DEFINE_ALIAS -# from .learning_rate import NaturalExpDecay #DEFINE_ALIAS -# from .learning_rate import NoamDecay #DEFINE_ALIAS -# from .learning_rate import PiecewiseDecay #DEFINE_ALIAS -# from .learning_rate import PolynomialDecay #DEFINE_ALIAS # from .loss import NCELoss #DEFINE_ALIAS from .loss import BCEWithLogitsLoss #DEFINE_ALIAS from .loss import CrossEntropyLoss #DEFINE_ALIAS diff --git a/python/paddle/nn/layer/learning_rate.py b/python/paddle/nn/layer/learning_rate.py deleted file mode 100644 index e91f755cb0615..0000000000000 --- a/python/paddle/nn/layer/learning_rate.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TODO: define learning rate decay - -__all__ = [ - # 'CosineDecay', - # 'ExponentialDecay', - # 'InverseTimeDecay', - # 'NaturalExpDecay', - # 'NoamDecay', - # 'PiecewiseDecay', - # 'PolynomialDecay' -] diff --git a/python/paddle/optimizer/lr_scheduler.py b/python/paddle/optimizer/lr_scheduler.py index 61391704061bd..7f808f8542db3 100644 --- a/python/paddle/optimizer/lr_scheduler.py +++ b/python/paddle/optimizer/lr_scheduler.py @@ -164,7 +164,7 @@ class NoamLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -176,11 +176,11 @@ class NoamLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -262,7 +262,7 @@ class PiecewiseLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -274,11 +274,11 @@ class PiecewiseLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -328,7 +328,7 @@ class NaturalExpLR(_LRScheduler): .. math:: - new\_learning\_rate = learning\_rate * e^{- gama * epoch} + new\_learning\_rate = learning\_rate * e^{- gamma * epoch} Args: learning_rate (float): The initial learning rate. It is a python float number. @@ -346,7 +346,7 @@ class NaturalExpLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -358,11 +358,11 @@ class NaturalExpLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -425,7 +425,7 @@ class InverseTimeLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -437,11 +437,11 @@ class InverseTimeLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -521,7 +521,7 @@ class PolynomialLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -533,11 +533,11 @@ class PolynomialLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -616,7 +616,7 @@ class LinearLrWarmup(_LRScheduler): lr = learning_rate - where lr is float or any subclass of ``_LRScheduler`` . + where ``learning_rate`` is float or any subclass of ``_LRScheduler`` . Args: learning_rate (float|_LRScheduler): The learning rate after warm-up. It is a python float number or any subclass of ``_LRScheduler`` . @@ -636,7 +636,7 @@ class LinearLrWarmup(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -649,11 +649,11 @@ class LinearLrWarmup(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -678,7 +678,7 @@ class LinearLrWarmup(_LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() """ def __init__(self, @@ -742,7 +742,7 @@ class ExponentialLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -754,11 +754,11 @@ class ExponentialLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -795,7 +795,7 @@ def get_lr(self): class MultiStepLR(_LRScheduler): """ - Update the learning rate by ``gama`` once ``epoch`` reaches one of the milestones. + Update the learning rate by ``gamma`` once ``epoch`` reaches one of the milestones. The algorithm can be described as the code below. @@ -830,7 +830,7 @@ class MultiStepLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -842,11 +842,11 @@ class MultiStepLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -939,7 +939,7 @@ class StepLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -951,11 +951,11 @@ class StepLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -1035,7 +1035,7 @@ class LambdaLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -1047,11 +1047,11 @@ class LambdaLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -1135,7 +1135,7 @@ class ReduceLROnPlateau(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode + # train on default dynamic graph mode paddle.disable_static() x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) @@ -1147,11 +1147,11 @@ class ReduceLROnPlateau(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step(loss) - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program() @@ -1258,7 +1258,7 @@ def step(self, metrics, epoch=None): else: self.last_epoch = epoch - # loss must be 1-D Tensor with shape [1] + # loss must be float, numpy.ndarray or 1-D Tensor with shape [1] if isinstance(metrics, (Tensor, numpy.ndarray)): assert len(metrics.shape) == 1 and metrics.shape[0] == 1, "the metrics.shape " \ "should be (1L,), but the current metrics.shape is {}. Maybe that " \ @@ -1309,16 +1309,7 @@ class CosineAnnealingLR(_LRScheduler): Set the learning rate using a cosine annealing schedule, where :math:`\eta_{max}` is set to the initial learning_rate. :math:`T_{cur}` is the number of epochs since the last restart in - SGDR: - - \begin{aligned} - \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 - + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right), - & T_{cur} \neq (2k+1)T_{max}; \\ - \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min}) - \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right), - & T_{cur} = (2k+1)T_{max}. - \end{aligned} + SGDR. The algorithm can be described as following. @@ -1352,8 +1343,7 @@ class CosineAnnealingLR(_LRScheduler): import paddle import numpy as np - # train on default dygraph mode - paddle.disable_static() + # train on default dynamic graph mode x = np.random.uniform(-1, 1, [10, 10]).astype("float32") linear = paddle.nn.Linear(10, 10) scheduler = paddle.optimizer.lr_scheduler.CosineAnnealingLR(learning_rate=0.5, T_max=10, verbose=True) @@ -1364,11 +1354,11 @@ class CosineAnnealingLR(_LRScheduler): out = linear(x) loss = paddle.reduce_mean(out) loss.backward() - sgd.minimize(loss) - linear.clear_gradients() + sgd.step() + sgd.clear_gradients() scheduler.step() - # train on static mode + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() start_prog = paddle.static.Program()