diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 525c3360f5eff..287ddc9621964 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -119,6 +119,8 @@ class ErrorClipByValue(BaseErrorClipAttr): .. code-block:: python import paddle.fluid as fluid + import paddle + paddle.enable_static() BATCH_SIZE = 128 CLIP_MAX = 2e-6 CLIP_MIN = -1e-6 @@ -132,11 +134,12 @@ class ErrorClipByValue(BaseErrorClipAttr): input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(cost) + avg_cost = paddle.mean(cost) prog_clip = prog.clone() prog_clip.block(0).var(hidden1.name)._set_error_clip( fluid.clip.ErrorClipByValue( max=CLIP_MAX, min=CLIP_MIN) + ) """ def __init__(self, max, min=None): diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index fffc3cd5a6e3f..bd170b348ae62 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -1965,6 +1965,8 @@ def fused_bn_add_act( import paddle import paddle.fluid as fluid + import paddle + paddle.enable_static() paddle.enable_static() # required: gpu @@ -1997,7 +1999,7 @@ def build_program(main_program, startup_program): fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn) prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=y) - loss = fluid.layers.mean(loss) + loss = paddle.mean(loss) sgd = fluid.optimizer.SGD(learning_rate=0.001) sgd = fluid.contrib.mixed_precision.decorate( sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 55e1dcacdcb62..eddbf68fe1a34 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -27,10 +27,10 @@ from ....framework import Program, program_guard, default_startup_program from ....data import data -from ....layers import mean from ....executor import scope_guard from ....framework import _get_paddle_place from . import utils +import paddle __all__ = [ 'QuantizationTransformPass', @@ -927,7 +927,7 @@ def _insert_func(self, graph, func, var_node, op): out_node = func(in_node) graph.out_node_mapping_table[out_node.name] = var_node.name() # loss shape must be 1 when minimize - loss = mean(out_node) + loss = paddle.mean(out_node) if not graph._for_test: assert ( self._optimizer diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e470d2f13f177..d39d9d89c1228 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -86,7 +86,6 @@ 'elementwise_mul', 'clip', 'clip_by_norm', - 'mean', 'mul', 'merge_selected_rows', 'get_tensor_from_selected_rows', @@ -3368,47 +3367,6 @@ def clip_by_norm(x, max_norm, name=None): return out -@deprecated(since="2.0.0", update_to="paddle.mean") -@templatedoc() -def mean(x, name=None): - """ - ${comment} - - Args: - x(${x_type}): ${x_comment} - name(basestring|None): Name of the output. - - Returns: - out(${out_type}): ${out_comment} - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - - input = fluid.layers.data( - name='data', shape=[2, 3], dtype='float32') - mean = paddle.mean(input) - """ - - if _in_legacy_dygraph(): - return _legacy_C_ops.mean(x) - if in_dygraph_mode(): - return _C_ops.mean_all(x) - - helper = LayerHelper("mean", **locals()) - check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'mean') - out = helper.create_variable_for_type_inference(dtype=x.dtype) - - helper.append_op( - type="mean", inputs={"X": x}, attrs={}, outputs={"Out": out} - ) - - return out - - @templatedoc() def merge_selected_rows(x, name=None): """ diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index ac5d70707903a..e7d02132ab9ab 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1452,7 +1452,7 @@ class SGDOptimizer(Optimizer): y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) + avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) @@ -1654,7 +1654,7 @@ class MomentumOptimizer(Optimizer): y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) + avg_cost = paddle.mean(cost) moment_optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) moment_optimizer.minimize(avg_cost) @@ -2232,7 +2232,7 @@ class AdamOptimizer(Optimizer): y = fluid.data(name='y', shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) + avg_cost = paddle.mean(cost) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) adam_optimizer.minimize(avg_cost) @@ -2261,7 +2261,7 @@ class AdamOptimizer(Optimizer): y = fluid.data(name='y', shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) + avg_cost = paddle.mean(cost) # define beta decay variable def get_decayed_betas(beta1_init, beta2_init, decay_steps, decay_rate, epsilon_init): @@ -2641,6 +2641,8 @@ class AdamaxOptimizer(Optimizer): import paddle.fluid as fluid import numpy + import paddle + paddle.enable_static() # First create the Executor. place = fluid.CPUPlace() # fluid.CUDAPlace(0) @@ -2651,7 +2653,7 @@ class AdamaxOptimizer(Optimizer): with fluid.program_guard(train_program, startup_program): data = fluid.data(name='X', shape=[None, 1], dtype='float32') hidden = fluid.layers.fc(input=data, size=10) - loss = fluid.layers.mean(hidden) + loss = paddle.mean(hidden) adam = fluid.optimizer.AdamaxOptimizer(learning_rate=0.2) adam.minimize(loss) @@ -2816,6 +2818,8 @@ class DpsgdOptimizer(Optimizer): import paddle.fluid as fluid import numpy + import paddle + paddle.enable_static() # First create the Executor. place = fluid.CPUPlace() # fluid.CUDAPlace(0) @@ -2826,7 +2830,7 @@ class DpsgdOptimizer(Optimizer): with fluid.program_guard(train_program, startup_program): data = fluid.layers.data(name='X', shape=[1], dtype='float32') hidden = fluid.layers.fc(input=data, size=10) - loss = fluid.layers.mean(hidden) + loss = paddle.mean(hidden) optimizer = fluid.optimizer.Dpsgd(learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0) optimizer.minimize(loss) @@ -3291,7 +3295,7 @@ class RMSPropOptimizer(Optimizer): y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) + avg_cost = paddle.mean(cost) rms_optimizer = fluid.optimizer.RMSProp(learning_rate=0.1) rms_optimizer.minimize(avg_cost) @@ -3510,7 +3514,7 @@ class FtrlOptimizer(Optimizer): y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) + avg_cost = paddle.mean(cost) ftrl_optimizer = fluid.optimizer.Ftrl(learning_rate=0.1) ftrl_optimizer.minimize(avg_cost) @@ -3679,11 +3683,13 @@ class LambOptimizer(AdamOptimizer): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() data = fluid.data(name='x', shape=[-1, 5], dtype='float32') hidden = fluid.layers.fc(input=data, size=10) - cost = fluid.layers.mean(hidden) + cost = paddle.mean(hidden) def exclude_fn(param): return param.name.endswith('.b_0') @@ -3885,8 +3891,10 @@ class ModelAverage(Optimizer): .. code-block:: python + import paddle import paddle.fluid as fluid import numpy + paddle.enable_static() # First create the Executor. place = fluid.CPUPlace() # fluid.CUDAPlace(0) @@ -3898,7 +3906,7 @@ class ModelAverage(Optimizer): # build net data = fluid.data(name='X', shape=[None, 1], dtype='float32') hidden = fluid.layers.fc(input=data, size=10) - loss = fluid.layers.mean(hidden) + loss = paddle.mean(hidden) optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1) optimizer.minimize(loss) @@ -4064,6 +4072,8 @@ def apply(self, executor, need_restore=True): import paddle.fluid as fluid import numpy + import paddle + paddle.enable_static() # First create the Executor. place = fluid.CPUPlace() # fluid.CUDAPlace(0) @@ -4075,7 +4085,7 @@ def apply(self, executor, need_restore=True): # build net data = fluid.data(name='X', shape=[None, 1], dtype='float32') hidden = fluid.layers.fc(input=data, size=10) - loss = fluid.layers.mean(hidden) + loss = paddle.mean(hidden) optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1) optimizer.minimize(loss) @@ -4118,6 +4128,8 @@ def restore(self, executor): import paddle.fluid as fluid import numpy + import paddle + paddle.enable_static() # First create the Executor. place = fluid.CPUPlace() # fluid.CUDAPlace(0) @@ -4129,7 +4141,7 @@ def restore(self, executor): # build net data = fluid.data(name='X', shape=[None, 1], dtype='float32') hidden = fluid.layers.fc(input=data, size=10) - loss = fluid.layers.mean(hidden) + loss = paddle.mean(hidden) optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1) optimizer.minimize(loss) diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 1152f0bbd86b6..17afccde8a229 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -68,6 +68,8 @@ class L2DecayRegularizer(WeightDecayRegularizer): # Example1: set Regularizer in optimizer import paddle.fluid as fluid + import paddle + paddle.enable_static() main_prog = fluid.Program() startup_prog = fluid.Program() @@ -77,7 +79,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): hidden = fluid.layers.fc(input=data, size=128, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) - avg_loss = fluid.layers.mean(loss) + avg_loss = paddle.mean(loss) optimizer = fluid.optimizer.Adagrad( learning_rate=1e-4, regularization=fluid.regularizer.L2Decay( @@ -87,6 +89,8 @@ class L2DecayRegularizer(WeightDecayRegularizer): # Example2: set Regularizer both in ParamAttr and optimizer import paddle.fluid as fluid + import paddle + paddle.enable_static() l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) @@ -97,7 +101,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): hidden1 = fluid.layers.fc(x, 8, param_attr=w_param) # fc_0.w_0(L1), fc_0.b_0 hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param) # fc_1.w_0(L1), fc_1.b_0 predict = fluid.layers.fc(hidden2, 32) # fc_3.w_0, fc_3.b_0 - avg_loss = fluid.layers.mean(predict) + avg_loss = paddle.mean(predict) # set L2 regularization in optimizer optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2) @@ -181,7 +185,8 @@ class L1DecayRegularizer(WeightDecayRegularizer): # Example1: set Regularizer in optimizer import paddle.fluid as fluid - + import paddle + paddle.enable_static() main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): @@ -190,7 +195,7 @@ class L1DecayRegularizer(WeightDecayRegularizer): hidden = fluid.layers.fc(input=data, size=128, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) - avg_loss = fluid.layers.mean(loss) + avg_loss = paddle.mean(loss) optimizer = fluid.optimizer.Adagrad( learning_rate=1e-4, regularization=fluid.regularizer.L1DecayRegularizer( @@ -200,7 +205,8 @@ class L1DecayRegularizer(WeightDecayRegularizer): # Example2: set Regularizer both in ParamAttr and optimizer import paddle.fluid as fluid - + import paddle + paddle.enable_static() l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) x = fluid.layers.uniform_random([3,4]) @@ -210,7 +216,7 @@ class L1DecayRegularizer(WeightDecayRegularizer): hidden1 = fluid.layers.fc(x, 8, param_attr=w_param) # fc_0.w_0(L1), fc_0.b_0 hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param) # fc_1.w_0(L1), fc_1.b_0 predict = fluid.layers.fc(hidden2, 32) # fc_3.w_0, fc_3.b_0 - avg_loss = fluid.layers.mean(predict) + avg_loss = paddle.mean(predict) # set L2 regularization in optimizer optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py index 687c8d06ad9a8..2bec49877c54d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py @@ -66,7 +66,7 @@ def get_loss(cos_q_pt, cos_q_nt): ), loss_op2, ) - avg_cost = fluid.layers.mean(loss_op3) + avg_cost = paddle.mean(loss_op3) return avg_cost is_distributed = False diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index ebb249903b228..437e91f3f4c05 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -288,12 +288,12 @@ class DistributeTranspiler: paddle.enable_static() - x = fluid.data(name='x', shape=[13], dtype='float32') + x = fluid.data(name='x', shape=[1,13], dtype='float32') y = fluid.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) - avg_loss = fluid.layers.mean(cost) + avg_loss = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_loss) diff --git a/tools/infrt/fake_models/multi_fc.py b/tools/infrt/fake_models/multi_fc.py index 29b941d5437ca..7f2e4b5aeae27 100644 --- a/tools/infrt/fake_models/multi_fc.py +++ b/tools/infrt/fake_models/multi_fc.py @@ -42,7 +42,7 @@ ) cost = fluid.layers.square_error_cost(fc_out, label) -avg_cost = fluid.layers.mean(cost) +avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.SGD(learning_rate=0.001) optimizer.minimize(avg_cost)