From 2a4200363c0d64b91bb9e92cc5cd781315bc9e4c Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 11 Apr 2023 11:09:57 +0800 Subject: [PATCH 01/59] support auto generate for op merged_momentum optimizer (#52708) * fix error in generator/type_mapping.py * support auto generate for op merged_momentum optimizer --- .../fluid/operators/generator/type_mapping.py | 2 +- .../optimizers/merged_momentum_op.cc | 111 ------------------ paddle/phi/api/yaml/legacy_ops.yaml | 11 -- paddle/phi/api/yaml/op_compat.yaml | 6 + paddle/phi/api/yaml/ops.yaml | 11 ++ paddle/phi/ops/compat/merged_momentum_sig.cc | 40 ------- 6 files changed, 18 insertions(+), 163 deletions(-) delete mode 100644 paddle/fluid/operators/optimizers/merged_momentum_op.cc delete mode 100644 paddle/phi/ops/compat/merged_momentum_sig.cc diff --git a/paddle/fluid/operators/generator/type_mapping.py b/paddle/fluid/operators/generator/type_mapping.py index 8aec1bcc49a5e..e6b59b7823abe 100644 --- a/paddle/fluid/operators/generator/type_mapping.py +++ b/paddle/fluid/operators/generator/type_mapping.py @@ -76,7 +76,7 @@ 'int64_t[]': 'std::vector', 'float[]': 'std::vector', 'double[]': 'std::vector', - 'str[]': 'std::vector<', + 'str[]': 'std::vector', } output_type_map = {'Tensor': 'Tensor', 'Tensor[]': 'std::vector'} diff --git a/paddle/fluid/operators/optimizers/merged_momentum_op.cc b/paddle/fluid/operators/optimizers/merged_momentum_op.cc deleted file mode 100644 index 17d31e35fdec2..0000000000000 --- a/paddle/fluid/operators/optimizers/merged_momentum_op.cc +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/multiary.h" - -namespace paddle { -namespace operators { - -class MergedMomentumOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - auto param_dtype = - framework::OperatorWithKernel::IndicateVarDataType(ctx, "Param"); - return phi::KernelKey(param_dtype, ctx.GetPlace()); - } -}; - -class MergedMomentumOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Param", - "(Tensor, default Tensor) " - "Input parameter that has to be updated") - .AsDuplicable(); - AddInput("Grad", - "(Tensor, default Tensor) " - "Input gradient of the parameter") - .AsDuplicable(); - AddInput("Velocity", - "(Tensor, default Tensor) " - "Input velocity (corresponding to the parameter) " - "that has to be updated") - .AsDuplicable(); - AddInput("LearningRate", - "(Tensor, default Tensor) " - "Input learning rate") - .AsDuplicable(); - AddInput("MasterParam", "FP32 master weight for AMP.") - .AsDispensable() - .AsDuplicable(); - AddOutput("ParamOut", - "(Tensor) This output is updated parameter. " - "It shared memory with Input(Param).") - .AsDuplicable(); - AddOutput("VelocityOut", - "(Tensor) This output is updated velocity. " - "It shared memory with Input(Velocity).") - .AsDuplicable(); - AddOutput("MasterParamOut", - "The updated FP32 master weight for AMP. " - "It shared memory with Input(MasterParam).") - .AsDispensable() - .AsDuplicable(); - AddAttr("mu", "(float) Momentum coefficient"); - AddAttr("use_nesterov", - "(bool, default false) " - "Use Nesterov Momentum or not.") - .SetDefault(false); - AddAttr>( - "regularization_method", - "(string) regularization_method, right now only " - "support l2decay or none") - .SetDefault({}); - AddAttr>("regularization_coeff", - "(float) regularization_coeff") - .SetDefault({}); - AddAttr("multi_precision", - "(bool, default false) " - "Whether to use multi-precision during weight updating.") - .SetDefault(false); - AddAttr( - "rescale_grad", - "(float, default 1.0) Multiply the gradient with `rescale_grad`" - "before updating. Often choose to be `1.0/batch_size`.") - .SetDefault(1.0f); - AddComment(R"DOC(Merged Momentum Optimizer.)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -DECLARE_INFER_SHAPE_FUNCTOR(merged_momentum, - MergedMomentumInferShapeFunctor, - PD_INFER_META(phi::MergedMomentumInferMeta)); - -REGISTER_OP_WITHOUT_GRADIENT(merged_momentum, - ops::MergedMomentumOp, - ops::MergedMomentumOpMaker, - MergedMomentumInferShapeFunctor); diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 53ae099e762ea..e44bbe7e6dd5b 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -884,17 +884,6 @@ data_type : param inplace : (param -> param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1_pow -> beta1_pow_out), (beta2_pow -> beta2_pow_out), (master_param -> master_param_out) -- op : merged_momentum_ - args : (Tensor[] param, Tensor[] grad, Tensor[] velocity, Tensor[] learning_rate, Tensor[] master_param, float mu, bool use_nesterov = false, str[] regularization_method = {}, float[] regularization_coeff = {}, bool multi_precision = false, float rescale_grad = 1.0f) - output : Tensor[](param_out){param.size()}, Tensor[](velocity_out){param.size()}, Tensor[](master_param_out){param.size()} - infer_meta : - func : MergedMomentumInferMeta - optional: master_param - kernel : - func : merged_momentum - data_type : param - inplace : (param -> param_out), (velocity -> velocity_out), (master_param -> master_param_out) - - op : min args : (Tensor x, IntArray axis={}, bool keepdim=false) output : Tensor(out) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index f807a3d748ba1..98a00e6f5a9c0 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -1421,6 +1421,12 @@ outputs : out : Out +- op : merged_momentum_ + inputs : + {param : Param, grad : Grad, velocity : Velocity, learning_rate : LearningRate, master_param : MasterParam} + outputs : + {param_out : ParamOut, velocity_out : VelocityOut, master_param_out : MasterParamOut} + - op : meshgrid backward : meshgrid_grad inputs : diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 3afbf00c049e6..31f4a114b7142 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1190,6 +1190,17 @@ kernel : func : merge_selected_rows {selected_rows -> selected_rows} +- op : merged_momentum_ + args : (Tensor[] param, Tensor[] grad, Tensor[] velocity, Tensor[] learning_rate, Tensor[] master_param, float mu, bool use_nesterov = false, str[] regularization_method = {}, float[] regularization_coeff = {}, bool multi_precision = false, float rescale_grad = 1.0f) + output : Tensor[](param_out){param.size()}, Tensor[](velocity_out){param.size()}, Tensor[](master_param_out){param.size()} + infer_meta : + func : MergedMomentumInferMeta + kernel : + func : merged_momentum + data_type : param + optional: master_param, master_param_out + inplace : (param -> param_out), (velocity -> velocity_out), (master_param -> master_param_out) + - op : meshgrid args : (Tensor[] inputs) output : Tensor[]{inputs.size()} diff --git a/paddle/phi/ops/compat/merged_momentum_sig.cc b/paddle/phi/ops/compat/merged_momentum_sig.cc deleted file mode 100644 index 3444d5e2d3097..0000000000000 --- a/paddle/phi/ops/compat/merged_momentum_sig.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature MergedMomentumOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "merged_momentum", - {"Param", "Grad", "Velocity", "LearningRate", "MasterParam"}, - {"mu", - "use_nesterov", - "regularization_method", - "regularization_coeff", - "multi_precision", - "rescale_grad"}, - { - "ParamOut", - "VelocityOut", - "MasterParamOut", - }); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(merged_momentum, - phi::MergedMomentumOpArgumentMapping); From 3e66845f017033669bccca6a385cde64937544bb Mon Sep 17 00:00:00 2001 From: WangZhen <23097963+0x45f@users.noreply.github.com> Date: Tue, 11 Apr 2023 11:16:53 +0800 Subject: [PATCH 02/59] [Dy2St]Add backend for to_static API (#52596) * Add backend for to_static API --- .../fluid/tests/unittests/test_input_spec.py | 2 +- python/paddle/jit/api.py | 24 ++++++- .../paddle/jit/dy2static/partial_program.py | 13 ++-- .../jit/dy2static/program_translator.py | 68 +++++++++++-------- python/paddle/jit/dy2static/utils.py | 21 +++++- test/dygraph_to_static/test_cinn_prim.py | 15 ++++ .../test_partial_program_hook.py | 2 +- 7 files changed, 105 insertions(+), 40 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_input_spec.py b/python/paddle/fluid/tests/unittests/test_input_spec.py index dad821438afb8..2bdce8b4b58c5 100644 --- a/python/paddle/fluid/tests/unittests/test_input_spec.py +++ b/python/paddle/fluid/tests/unittests/test_input_spec.py @@ -349,7 +349,7 @@ def test_run(self): ) x = paddle.randn([2, 10]) out = net(x) - np.testing.assert_equal(out.shape, [2, 5]) + np.testing.assert_equal(net.forward._input_spec, None) if __name__ == '__main__': diff --git a/python/paddle/jit/api.py b/python/paddle/jit/api.py index bc07609a111ee..bde75f6ad73a0 100644 --- a/python/paddle/jit/api.py +++ b/python/paddle/jit/api.py @@ -218,8 +218,23 @@ def ignore_module(modules: list[Any]): add_ignore_module(modules) +def _check_and_set_backend(backend, build_strategy): + if backend not in ['CINN', None]: + raise ValueError( + "The backend of to_static should be 'CINN' or None, but received {}.".format( + backend + ) + ) + if backend == 'CINN': + build_strategy.build_cinn_pass = True + + def to_static( - function=None, input_spec=None, build_strategy=None, property=False + function=None, + input_spec=None, + build_strategy=None, + backend=None, + **kwargs, ): """ Converts imperative dygraph APIs into declarative function APIs. Decorator @@ -228,7 +243,6 @@ def to_static( Tensor(s) to do imperative training, inference, or other operations. If the decorated function calls other imperative function, the called one will be converted into declarative function as well. - Args: function (callable): callable imperative function. input_spec(list[InputSpec]|tuple[InputSpec]): list/tuple of InputSpec to specific the shape/dtype/name @@ -238,7 +252,8 @@ def to_static( in the computational graph and memory optimization during the execution of the computational graph. For more information about build_strategy, please refer to :code:`paddle.static.BuildStrategy`. The default is None. - property(bool, Optional): whether the fucntion is python property. The default is False. + backend(str, Optional): Specifies compilation backend, which can be `CINN` or None. When backend is `CINN`, CINN compiler will be used to speed up training and inference. + kwargs: Support keys including `property`, set `property` to True if the fucntion is python property. Returns: @@ -263,6 +278,7 @@ def func(x): print(x_v) # [[2. 2.]] """ + property = kwargs.get("property", False) def decorated(python_func): """ @@ -279,6 +295,7 @@ def decorated(python_func): input_spec=input_spec, build_strategy=build_strategy, property=property, + backend=backend, ), ) @@ -291,6 +308,7 @@ def decorated(python_func): type(build_strategy).__name__ ) ) + _check_and_set_backend(backend, build_strategy) # for usage: `to_static(foo, ...)` if function is not None: diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index 9538bb9300742..7a6afc82b1bf0 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -27,7 +27,12 @@ from paddle.optimizer.lr import LRScheduler from . import logging_utils -from .utils import RETURN_NO_VALUE_MAGIC_NUM, _out_grad_names, _param_grad_names +from .utils import ( + RETURN_NO_VALUE_MAGIC_NUM, + _out_grad_names, + _param_grad_names, + backend_guard, +) __all__ = [] @@ -197,6 +202,7 @@ def __init__( # program_id -> list(scope) self._scope_cache = {} self._hooker = None + self._backend = kwargs.get('backend', None) def __call__(self, inputs): """ @@ -636,10 +642,9 @@ def _append_backward_desc(self, main_program): start_idx = len(program.block(0).ops) + len(self._outputs.tolist()) if targets: - # TODO(CZ): later when use cinn, set_prim_all_enabled and check_and_set_prim_all_enabled will be set at else branch. - core.check_and_set_prim_all_enabled() start_idx = len(program.block(0).ops) + len(self._outputs.tolist()) - backward.gradients(targets=targets, inputs=[]) + with backend_guard(self._backend): + backward.gradients(targets=targets, inputs=[]) if self._hooker: program, start_idx = self._hooker.after_append_backward( diff --git a/python/paddle/jit/dy2static/program_translator.py b/python/paddle/jit/dy2static/program_translator.py index 3777af8879d9a..a8be1abb2a10f 100644 --- a/python/paddle/jit/dy2static/program_translator.py +++ b/python/paddle/jit/dy2static/program_translator.py @@ -48,6 +48,7 @@ NO_SHAPE_VAR_TYPE, ast_to_func, ast_to_source_code, + backend_guard, func_to_source_code, input_specs_compatible, is_paddle_func, @@ -334,7 +335,7 @@ def __init__(self, function, input_spec=None, **kwargs): self._class_instance = None if input_spec is not None and prim_or_cinn_is_enabled( - kwargs.get("build_strategy", None) + kwargs.get("build_strategy", None), kwargs.get("backend", None) ): from paddle.static import InputSpec @@ -1184,11 +1185,9 @@ def __init__(self): def _build_once(self, cache_key): # TODO(Aurelius84): Need a gloabl FLAGS to enable/disable to_prim enable_prim = cache_key.kwargs['build_strategy'].build_cinn_pass - # TODO(CZ): later when use cinn, set_prim_all_enabled and check_and_set_prim_all_enabled will be set at else branch. # NOTE(xiongkun): Need a global FLAGS to enable/disable fallback enable_fallback = enable_prim - core.check_and_set_prim_all_enabled() try: concrete_program = ConcreteProgram.from_func_spec( func_spec=cache_key.function_spec, @@ -1216,7 +1215,8 @@ def _build_once(self, cache_key): else: raise - if prim_or_cinn_is_enabled(cache_key.kwargs['build_strategy']): + backend = cache_key.kwargs['backend'] + if prim_or_cinn_is_enabled(cache_key.kwargs['build_strategy'], backend): for var in concrete_program.main_program.list_vars(): if var.type not in NO_SHAPE_VAR_TYPE and -1 in var.shape: warnings.warn( @@ -1228,10 +1228,11 @@ def _build_once(self, cache_key): partial_program = partial_program_from( concrete_program, cache_key.class_instance is not None ) - if core._is_fwd_prim_enabled(): - partial_program.set_hooker( - PrimHooker(concrete_program.main_program) - ) + with backend_guard(backend): + if core._is_fwd_prim_enabled(): + partial_program.set_hooker( + PrimHooker(concrete_program.main_program, backend) + ) return concrete_program, partial_program def __getitem__(self, item): @@ -1291,39 +1292,46 @@ def clear(self): class PrimHooker(PartialProgramLayerHook): - def __init__(self, original_program): + def __init__(self, original_program, backend): if len(original_program.blocks) > 1: raise ValueError( 'The primitive mode only support one block currently.' ) + self.backend = backend self.custom_vjps = set() - if core._is_all_prim_enabled(): - self.custom_vjps = { - op.type - for op in original_program.block(0).ops - if core.has_comp_grad_op_maker(op.type) - } + with backend_guard(self.backend): + if core._is_all_prim_enabled(): + self.custom_vjps = { + op.type + for op in original_program.block(0).ops + if core.has_comp_grad_op_maker(op.type) + } def before_append_backward(self, forward_program): - if core._is_fwd_prim_enabled(): - _to_prim(forward_program.blocks, blacklist=self.custom_vjps) - return forward_program + with backend_guard(self.backend): + if core._is_fwd_prim_enabled(): + _to_prim(forward_program.blocks, blacklist=self.custom_vjps) + return forward_program def after_append_backward(self, whole_program, backward_start_idx): - backward_length = len(whole_program.block(0).ops) - backward_start_idx - if core._is_fwd_prim_enabled() and len(self.custom_vjps) != 0: - # only process backward part of block - _to_prim(whole_program.blocks, backward_length=backward_length) - new_start_index = len(whole_program.block(0).ops) - backward_length - if backward_length > 0: - # only process forward part of block - _to_prim(whole_program.blocks, start_idx=new_start_index) - return whole_program, new_start_index + with backend_guard(self.backend): + backward_length = ( + len(whole_program.block(0).ops) - backward_start_idx + ) + if core._is_fwd_prim_enabled() and len(self.custom_vjps) != 0: + # only process backward part of block + _to_prim(whole_program.blocks, backward_length=backward_length) + new_start_index = len(whole_program.block(0).ops) - backward_length + if backward_length > 0: + # only process forward part of block + _to_prim(whole_program.blocks, start_idx=new_start_index) + return whole_program, new_start_index def after_infer(self, infer_program): - if core._is_fwd_prim_enabled(): - _to_prim(infer_program.block(0)) - return infer_program + with backend_guard(self.backend): + if core._is_fwd_prim_enabled(): + _to_prim(infer_program.block(0)) + return infer_program class ProgramTranslator: diff --git a/python/paddle/jit/dy2static/utils.py b/python/paddle/jit/dy2static/utils.py index 3608b8d0641a5..28c8c739f2efc 100644 --- a/python/paddle/jit/dy2static/utils.py +++ b/python/paddle/jit/dy2static/utils.py @@ -35,6 +35,7 @@ from paddle.fluid import core, unique_name from paddle.fluid.data_feeder import convert_dtype from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.wrapped_decorator import signature_safe_contextmanager from paddle.utils import gast from .ast_utils import ast_to_source_code @@ -1498,7 +1499,10 @@ def _out_grad_names(program_desc, fwd_end_op_index, out_size): return names -def prim_or_cinn_is_enabled(build_strategy): +def prim_or_cinn_is_enabled(build_strategy, backend): + if backend == 'CINN': + return True + if build_strategy is not None and build_strategy.build_cinn_pass: return True @@ -1534,3 +1538,18 @@ def name_judge(): return True else: return False + + +@signature_safe_contextmanager +def backend_guard(backend): + core.check_and_set_prim_all_enabled() + orign_fwd = core._is_fwd_prim_enabled() + orign_bwd = core._is_bwd_prim_enabled() + + if backend == 'CINN': + core._set_prim_all_enabled(True) + try: + yield + finally: + core._set_prim_forward_enabled(orign_fwd) + core._set_prim_backward_enabled(orign_bwd) diff --git a/test/dygraph_to_static/test_cinn_prim.py b/test/dygraph_to_static/test_cinn_prim.py index 6ace7696c383a..c5527e85238b6 100644 --- a/test/dygraph_to_static/test_cinn_prim.py +++ b/test/dygraph_to_static/test_cinn_prim.py @@ -163,5 +163,20 @@ def test_cinn_prim(self): ) +class TestBackend(unittest.TestCase): + def test_backend(self): + x = paddle.randn([2, 4]) + out1 = self.forward(x, 'CINN') + out2 = self.forward(x, None) + np.testing.assert_allclose(out1, out2, rtol=1e-6) + + def forward(self, x, beckend=None): + paddle.seed(2022) + net = PrimeNet() + net = paddle.jit.to_static(net, backend=beckend) + out = net(x) + return out + + if __name__ == '__main__': unittest.main() diff --git a/test/dygraph_to_static/test_partial_program_hook.py b/test/dygraph_to_static/test_partial_program_hook.py index 896dde419bf20..b9a64d3d0993a 100644 --- a/test/dygraph_to_static/test_partial_program_hook.py +++ b/test/dygraph_to_static/test_partial_program_hook.py @@ -44,7 +44,7 @@ def f(): f ).get_concrete_program() self._hook = program_translator.PrimHooker( - concrete_program.main_program + concrete_program.main_program, None ) self._forward = partial_program.forward_program self._whole = partial_program._train_program From 17fec4e9aae895c513d9b07af55adf56059c5e42 Mon Sep 17 00:00:00 2001 From: cyberslack_lee Date: Tue, 11 Apr 2023 11:19:51 +0800 Subject: [PATCH 03/59] =?UTF-8?q?=E3=80=90Hackathon4=20No58=E3=80=91empty?= =?UTF-8?q?=5Flike=20fp16&bf16=20API=20test=20(#52668)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/unittests/test_empty_like_op.py | 90 +++++++++++++++++-- python/paddle/tensor/attribute.py | 1 + python/paddle/tensor/creation.py | 20 ++++- 3 files changed, 102 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_empty_like_op.py b/python/paddle/fluid/tests/unittests/test_empty_like_op.py index 8ccaabd7c2cf0..164275b1a7d83 100644 --- a/python/paddle/fluid/tests/unittests/test_empty_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_empty_like_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np +from eager_op_test import convert_uint16_to_float import paddle from paddle.fluid import core @@ -38,7 +39,7 @@ def __check_out__(self, out): f'shape should be {self.dst_shape}, but get {shape}', ) - if data_type in ['float32', 'float64', 'int32', 'int64']: + if data_type in ['float16', 'float32', 'float64', 'int32', 'int64']: max_value = np.nanmax(out) min_value = np.nanmin(out) always_non_full_zero = max_value >= min_value @@ -47,6 +48,16 @@ def __check_out__(self, out): always_full_zero or always_non_full_zero, 'always_full_zero or always_non_full_zero.', ) + elif data_type in ['uint16']: + uout = convert_uint16_to_float(out) + max_value = np.nanmax(uout) + min_value = np.nanmin(uout) + always_non_full_zero = max_value >= min_value + always_full_zero = max_value == 0.0 and min_value == 0.0 + self.assertTrue( + always_full_zero or always_non_full_zero, + 'always_full_zero or always_non_full_zero.', + ) elif data_type in ['bool']: total_num = out.size true_num = np.sum(out) @@ -154,16 +165,13 @@ def setUp(self): def test_static_graph(self): paddle.enable_static() - - dtype = 'float32' - train_program = Program() startup_program = Program() with program_guard(train_program, startup_program): - x = np.random.random(self.x_shape).astype(dtype) + x = np.random.random(self.x_shape).astype(self.dtype) data_x = paddle.static.data( - 'x', shape=self.data_x_shape, dtype=dtype + 'x', shape=self.data_x_shape, dtype=self.dtype ) out = paddle.empty_like(data_x) @@ -176,7 +184,7 @@ def test_static_graph(self): exe = paddle.static.Executor(place) res = exe.run(train_program, feed={'x': x}, fetch_list=[out]) - self.dst_dtype = dtype + self.dst_dtype = self.dtype self.dst_shape = x.shape self.__check_out__(res[0]) @@ -185,12 +193,80 @@ def test_static_graph(self): def init_config(self): self.x_shape = (200, 3) self.data_x_shape = [200, 3] + self.dtype = 'float32' class TestEmptyLikeAPI_Static2(TestEmptyLikeAPI_Static): def init_config(self): self.x_shape = (3, 200, 3) self.data_x_shape = [-1, 200, 3] + self.dtype = 'float32' + + +class TestEmptyLikeAPI_StaticForFP16Op(TestEmptyLikeAPICommon): + def setUp(self): + self.init_config() + + def init_config(self): + self.x_shape = (200, 3) + self.data_x_shape = [200, 3] + self.dtype = 'float16' + + def test_static_graph(self): + paddle.enable_static() + if paddle.fluid.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + x = np.random.random([200, 3]).astype(self.dtype) + data_x = paddle.static.data( + name="x", shape=[200, 3], dtype=self.dtype + ) + out = paddle.empty_like(data_x) + exe = paddle.static.Executor(place) + res = exe.run( + paddle.static.default_main_program(), + feed={'x': x}, + fetch_list=[out], + ) + + self.dst_dtype = self.dtype + self.dst_shape = x.shape + self.__check_out__(res[0]) + + +class TestEmptyLikeAPI_StaticForBF16Op(TestEmptyLikeAPICommon): + def setUp(self): + self.init_config() + + def init_config(self): + self.x_shape = (200, 3) + self.data_x_shape = [200, 3] + self.dtype = 'uint16' + + def test_static_graph(self): + paddle.enable_static() + if paddle.fluid.core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + with paddle.static.program_guard( + paddle.static.Program(), paddle.static.Program() + ): + x = np.random.random([200, 3]).astype(np.uint16) + data_x = paddle.static.data( + name="x", shape=[200, 3], dtype=np.uint16 + ) + out = paddle.empty_like(data_x) + exe = paddle.static.Executor(place) + res = exe.run( + paddle.static.default_main_program(), + feed={'x': x}, + fetch_list=[out], + ) + + self.dst_dtype = self.dtype + self.dst_shape = x.shape + self.__check_out__(res[0]) class TestEmptyError(unittest.TestCase): diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py index 7a859d64d0c51..63af833747b1b 100644 --- a/python/paddle/tensor/attribute.py +++ b/python/paddle/tensor/attribute.py @@ -120,6 +120,7 @@ def shape(input): 'int64', 'complex64', 'complex128', + 'uint16', ], 'shape', ) diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 602fa7186ec84..99d9ad594c119 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -1954,13 +1954,29 @@ def empty_like(x, dtype=None, name=None): check_variable_and_dtype( x, 'x', - ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], + [ + 'bool', + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + 'uint16', + ], 'empty_like', ) check_dtype( dtype, 'dtype', - ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], + [ + 'bool', + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + 'uint16', + ], 'empty_like', ) out = helper.create_variable_for_type_inference(dtype=dtype) From 230325906483b3e3b473f5177ede1a0de2132415 Mon Sep 17 00:00:00 2001 From: wangzhen38 <41941775+wangzhen38@users.noreply.github.com> Date: Tue, 11 Apr 2023 11:20:35 +0800 Subject: [PATCH 04/59] [BUG Fixs] adadelta lr support (#49732) --- .../fluid/operators/optimizers/adadelta_op.cc | 1 + paddle/fluid/pybind/eager_generator.h | 7 ++- paddle/phi/api/yaml/legacy_ops.yaml | 2 +- paddle/phi/infermeta/multiary.cc | 6 +++ paddle/phi/infermeta/multiary.h | 1 + paddle/phi/kernels/adadelta_kernel.h | 1 + .../phi/kernels/impl/adadelta_kernel_impl.h | 47 ++++++++++++------- paddle/phi/kernels/xpu/adadelta_kernel.cc | 1 + paddle/phi/ops/compat/adadelta_sig.cc | 20 ++++---- python/paddle/fluid/optimizer.py | 2 + .../fluid/tests/unittests/test_adadelta_op.py | 9 +++- python/paddle/optimizer/adadelta.py | 2 + 12 files changed, 69 insertions(+), 30 deletions(-) diff --git a/paddle/fluid/operators/optimizers/adadelta_op.cc b/paddle/fluid/operators/optimizers/adadelta_op.cc index 2df8ff971cef1..cb2c374d017fd 100644 --- a/paddle/fluid/operators/optimizers/adadelta_op.cc +++ b/paddle/fluid/operators/optimizers/adadelta_op.cc @@ -39,6 +39,7 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("AvgSquaredGrad", "(Tensor) Input average of squared gradient"); AddInput("AvgSquaredUpdate", "(Tensor) Input average of squared parameter updates"); + AddInput("LearningRate", "(Tensor) Learning rate"); AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable(); AddOutput("ParamOut", "(Tensor) Output parameter"); diff --git a/paddle/fluid/pybind/eager_generator.h b/paddle/fluid/pybind/eager_generator.h index 2eb7934c911c5..03b8690569c22 100644 --- a/paddle/fluid/pybind/eager_generator.h +++ b/paddle/fluid/pybind/eager_generator.h @@ -220,7 +220,12 @@ std::map> op_ins_map = { {"sgd", {"Param", "LearningRate", "Grad", "MasterParam"}}, {"adagrad", {"Param", "Grad", "Moment", "LearningRate", "MasterParam"}}, {"adadelta", - {"Param", "Grad", "AvgSquaredGrad", "AvgSquaredUpdate", "MasterParam"}}, + {"Param", + "Grad", + "AvgSquaredGrad", + "AvgSquaredUpdate", + "LearningRate", + "MasterParam"}}, {"graph_khop_sampler", {"Row", "Eids", "Col_Ptr", "X"}}, {"nce", {"Input", diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index e44bbe7e6dd5b..2d0aadcf5362c 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -11,7 +11,7 @@ backward : abs_grad - op : adadelta_ - args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, Tensor master_param, float rho, float epsilon, bool multi_precision) + args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, Tensor learning_rate, Tensor master_param, float rho, float epsilon, bool multi_precision) output : Tensor(param_out), Tensor(moment_out), Tensor(inf_norm_out), Tensor(master_param_out) infer_meta : func : AdadeltaInferMeta diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index ea93a5874932e..7364f85e75155 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -40,6 +40,7 @@ void AdadeltaInferMeta(const MetaTensor& param, const MetaTensor& grad, const MetaTensor& avg_squared_grad, const MetaTensor& avg_squared_update, + const MetaTensor& learning_rate, const MetaTensor& master_param, float rho, float epsilon, @@ -48,6 +49,11 @@ void AdadeltaInferMeta(const MetaTensor& param, MetaTensor* avg_squared_grad_out, MetaTensor* avg_squared_update_out, MetaTensor* master_param_out) { + auto lr_dims = learning_rate.dims(); + PADDLE_ENFORCE_EQ( + phi::product(lr_dims), + 1, + phi::errors::InvalidArgument("LearningRate should have one element")); auto param_dims = param.dims(); PADDLE_ENFORCE_EQ( param_dims, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index cf6ca3c2a9fb6..178910e3620c9 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -43,6 +43,7 @@ void AdadeltaInferMeta(const MetaTensor& param, const MetaTensor& grad, const MetaTensor& avg_squared_grad, const MetaTensor& avg_squared_update, + const MetaTensor& learning_rate, const MetaTensor& master_param, float rho, float epsilon, diff --git a/paddle/phi/kernels/adadelta_kernel.h b/paddle/phi/kernels/adadelta_kernel.h index 15c07b3e6f967..16f4e6ca26980 100644 --- a/paddle/phi/kernels/adadelta_kernel.h +++ b/paddle/phi/kernels/adadelta_kernel.h @@ -24,6 +24,7 @@ void AdadeltaKernel(const Context& dev_ctx, const DenseTensor& grad, const DenseTensor& avg_squared_grad, const DenseTensor& avg_squared_update, + const DenseTensor& learning_rate, const paddle::optional& master_param, float rho, float epsilon, diff --git a/paddle/phi/kernels/impl/adadelta_kernel_impl.h b/paddle/phi/kernels/impl/adadelta_kernel_impl.h index b0c0a072acd55..c432c72d832c6 100644 --- a/paddle/phi/kernels/impl/adadelta_kernel_impl.h +++ b/paddle/phi/kernels/impl/adadelta_kernel_impl.h @@ -13,11 +13,14 @@ // limitations under the License. #pragma once +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/kernels/adadelta_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" -#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/math_function.h" namespace phi { @@ -27,6 +30,7 @@ void AdadeltaKernel(const Context& dev_ctx, const DenseTensor& grad, const DenseTensor& avg_squared_grad, const DenseTensor& avg_squared_update, + const DenseTensor& learning_rate, const paddle::optional& master_param, float rho, float epsilon, @@ -56,29 +60,36 @@ void AdadeltaKernel(const Context& dev_ctx, auto eigen_avg_squared_update_out = EigenVector::Flatten(*avg_squared_update_out); auto& place = *dev_ctx.eigen_device(); - auto eigen_grad_cast = eigen_grad.template cast(); - eigen_avg_squared_grad_out.device(place) = rho_ * eigen_avg_squared_grad + (1 - rho_) * eigen_grad_cast.square(); - auto update = -((eigen_avg_squared_update + epsilon_) / - (eigen_avg_squared_grad_out + epsilon_)) - .sqrt() * - eigen_grad_cast; - eigen_avg_squared_update_out.device(place) = - rho_ * eigen_avg_squared_update + (1 - rho_) * update.square(); - - if (multi_precision) { - auto eigen_master_param_out = - EigenVector::Flatten(*master_param_outs); - auto eigen_master_param = EigenVector::Flatten(*master_param); - - eigen_master_param_out.device(place) = eigen_master_param + update; + auto update = + -(((eigen_avg_squared_update + epsilon_).sqrt()) / + ((eigen_avg_squared_grad_out + epsilon_).sqrt()) * eigen_grad_cast); + Eigen::DSizes m_dsize(avg_squared_update_out->numel()); + if (paddle::platform::is_cpu_place(dev_ctx.GetPlace())) { + auto* lr = learning_rate.data(); eigen_param_out.device(place) = - (eigen_param.template cast() + update).template cast(); + eigen_param + lr[0] * update.template cast(); } else { - eigen_param_out.device(place) = eigen_param + update.template cast(); + auto lr = EigenVector::Flatten(learning_rate); + if (multi_precision) { + auto eigen_master_param_out = + EigenVector::Flatten(*master_param_outs); + auto eigen_master_param = EigenVector::Flatten(*master_param); + + eigen_master_param_out.device(place) = + eigen_master_param + lr.broadcast(m_dsize) * update; + eigen_param_out.device(place) = (eigen_param.template cast() + + lr.broadcast(m_dsize) * update) + .template cast(); + } else { + eigen_param_out.device(place) = + eigen_param + (lr.broadcast(m_dsize) * update).template cast(); + } } + eigen_avg_squared_update_out.device(place) = + rho_ * eigen_avg_squared_update + (1 - rho_) * update.square(); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/adadelta_kernel.cc b/paddle/phi/kernels/xpu/adadelta_kernel.cc index e02a5aeabad2e..b87ec1afbdc36 100644 --- a/paddle/phi/kernels/xpu/adadelta_kernel.cc +++ b/paddle/phi/kernels/xpu/adadelta_kernel.cc @@ -25,6 +25,7 @@ void AdadeltaKernel(const Context& dev_ctx, const DenseTensor& grad, const DenseTensor& avg_squared_grad, const DenseTensor& avg_squared_update, + const DenseTensor& learning_rate, const paddle::optional& master_param, float rho, float epsilon, diff --git a/paddle/phi/ops/compat/adadelta_sig.cc b/paddle/phi/ops/compat/adadelta_sig.cc index fd285e7e5d0e5..da7e4229a0d22 100644 --- a/paddle/phi/ops/compat/adadelta_sig.cc +++ b/paddle/phi/ops/compat/adadelta_sig.cc @@ -18,14 +18,18 @@ namespace phi { KernelSignature AdadeltaOpArgumentMapping(const ArgumentMappingContext& ctx) { if (ctx.IsDenseTensorInput("Grad")) { - return KernelSignature( - "adadelta", - {"Param", "Grad", "AvgSquaredGrad", "AvgSquaredUpdate", "MasterParam"}, - {"rho", "epsilon", "multi_precision"}, - {"ParamOut", - "AvgSquaredGradOut", - "AvgSquaredUpdateOut", - "MasterParamOut"}); + return KernelSignature("adadelta", + {"Param", + "Grad", + "AvgSquaredGrad", + "AvgSquaredUpdate", + "LearningRate", + "MasterParam"}, + {"rho", "epsilon", "multi_precision"}, + {"ParamOut", + "AvgSquaredGradOut", + "AvgSquaredUpdateOut", + "MasterParamOut"}); } return KernelSignature("unregistered", {}, {}, {}); diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 6ed9e674689ee..db483b151e4eb 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -3215,6 +3215,7 @@ def _append_optimize_op(self, block, param_and_grad): param_and_grad[1], avg_squared_grad_acc, avg_squared_update_acc, + self._create_param_lr(param_and_grad), master_weight, self._rho, self._epsilon, @@ -3227,6 +3228,7 @@ def _append_optimize_op(self, block, param_and_grad): "Grad": param_and_grad[1], "AvgSquaredGrad": avg_squared_grad_acc, "AvgSquaredUpdate": avg_squared_update_acc, + "LearningRate": self._create_param_lr(param_and_grad), } outputs = { "ParamOut": param_and_grad[0], diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py index 11db47b2475b9..f3eca8fec9cc7 100644 --- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py +++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py @@ -26,6 +26,7 @@ def adadelta_wrapper( Grad, AvgSquaredGrad, AvgSquaredUpdate, + LearningRate, master_weight=None, rho=0.95, epsilon=1e-6, @@ -35,12 +36,13 @@ def adadelta_wrapper( Grad, AvgSquaredGrad, AvgSquaredUpdate, + LearningRate, None, rho, epsilon, False, ) - return Param, AvgSquaredGrad, AvgSquaredUpdate + return Param, AvgSquaredGrad, AvgSquaredUpdate, LearningRate class TestAdadeltaOp1(OpTest): @@ -58,11 +60,13 @@ def setUp(self): rho = 0.95 epsilon = 1e-6 + learning_rate = 1.0 self.inputs = { 'Param': param, 'Grad': grad, 'AvgSquaredGrad': avg_squared_grad, 'AvgSquaredUpdate': avg_squared_update, + 'LearningRate': np.array([learning_rate]).astype("float32"), } self.attrs = {'rho': rho, 'epsilon': epsilon} @@ -113,12 +117,13 @@ def setUp(self): epsilon = 1e-6 self.attrs = {'rho': rho, 'epsilon': epsilon} - + learning_rate = 1.0 self.inputs = { 'Param': param, 'Grad': grad, 'AvgSquaredGrad': avg_squared_grad, 'AvgSquaredUpdate': avg_squared_update, + 'LearningRate': np.array([learning_rate]).astype("float32"), } avg_squared_grad_out = rho * avg_squared_grad + (1 - rho) * np.square( diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index 1cdb61f698e6b..c760c535da022 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -197,6 +197,7 @@ def _append_optimize_op(self, block, param_and_grad): param_and_grad[1], avg_squared_grad_acc, avg_squared_update_acc, + self._create_param_lr(param_and_grad), master_weight, self._rho, self._epsilon, @@ -213,6 +214,7 @@ def _append_optimize_op(self, block, param_and_grad): "Grad": param_and_grad[1], "AvgSquaredGrad": avg_squared_grad_acc, "AvgSquaredUpdate": avg_squared_update_acc, + "LearningRate": self._create_param_lr(param_and_grad), } outputs = { "ParamOut": param_and_grad[0], From f80a0fe9d81513957020b5bfd82cb4249101f0d3 Mon Sep 17 00:00:00 2001 From: risemeup1 <62429225+risemeup1@users.noreply.github.com> Date: Tue, 11 Apr 2023 12:01:31 +0800 Subject: [PATCH 05/59] fix_mac_m1_error (#52720) --- .../fleet/base/distributed_strategy.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 194e4bd667555..0f09440e4337c 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -24,12 +24,6 @@ from paddle.fluid.framework import _global_flags from paddle.fluid.wrapped_decorator import wrap_decorator -protobuf_version = google.protobuf.__version__ -if protobuf_version >= "4.21.0": - from google._upb import _message -else: - from google.protobuf.pyext import _message - __all__ = [] non_auto_func_called = True @@ -2512,10 +2506,19 @@ def __repr__(self): self.strategy, f.name + "_configs" ) config_fields = my_configs.DESCRIPTOR.fields + protobuf_version = google.protobuf.__version__ + if protobuf_version >= "4.21.0": + RepeatedScalarContainer = ( + google._upb._message.RepeatedScalarContainer + ) + else: + RepeatedScalarContainer = ( + google.protobuf.pyext._message.RepeatedScalarContainer + ) for ff in config_fields: if isinstance( getattr(my_configs, ff.name), - _message.RepeatedScalarContainer, + RepeatedScalarContainer, ): values = getattr(my_configs, ff.name) for i, v in enumerate(values): From 6b74cf76cbaf521cd34633a572acb6abbbd124d8 Mon Sep 17 00:00:00 2001 From: wuhuachaocoding <77733235+wuhuachaocoding@users.noreply.github.com> Date: Tue, 11 Apr 2023 12:11:47 +0800 Subject: [PATCH 06/59] mp sync params & grads & opt states. (#51428) --- .../framework/distributed_strategy.proto | 8 + .../fleet/base/distributed_strategy.py | 6 + .../hybrid_parallel_optimizer.py | 81 +++++++++- .../fleet/hybrid_parallel_mp_model.py | 144 ++++++++++++++++++ 4 files changed, 238 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index b9055d38d38c5..de2e38c2f1165 100755 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -50,11 +50,19 @@ message ShardingConfig { optional bool enable_tuning = 15 [ default = false ]; // incubate for auto parallel } +// for dygraph +message MpConfig { + optional bool sync_param= 1 [ default = false ]; + optional bool sync_grad= 2 [ default = false ]; + optional bool sync_moment= 3 [ default = false ]; +} + message HybridConfig { optional int32 dp_degree = 1 [ default = -1 ]; optional int32 mp_degree = 2 [ default = 1 ]; optional int32 pp_degree = 3 [ default = 1 ]; optional int32 sharding_degree = 4 [ default = 1 ]; + optional MpConfig mp_configs = 5; } message AMPConfig { diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 0f09440e4337c..86292a2d90e79 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -1696,6 +1696,12 @@ def hybrid_configs(self, configs): check_configs_key( self.strategy.hybrid_configs, hybrid_config, "hybrid_configs" ) + + if "mp_configs" in configs: + assign_configs_value( + self.strategy.hybrid_configs.mp_configs, configs["mp_configs"] + ) + configs.pop("mp_configs") assign_configs_value(self.strategy.hybrid_configs, configs) @property diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index 98604b8db3d8c..acd34f1b1d5b8 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. + import paddle from paddle import framework from paddle.autograd import no_grad +from paddle.distributed import fleet from paddle.framework import core from paddle.nn import ClipGradByGlobalNorm, clip @@ -292,6 +294,83 @@ def __init__(self, optimizer, hcg, strategy): self._inner_opt._grad_clip, hcg ) + def _filter_fn(self, param): + p_name = param.name + tar_param = ["embedding", "layer_norm", ".b_"] + if param.is_distributed is False: + for tar in tar_param: + if tar in p_name: + return True + return False + + def _step(self, parameters_list): + mp_group = self._hcg.get_model_parallel_group() + src_rank = self._hcg.get_model_parallel_group_src_rank() + params = None + mp_configs = None + + if mp_group.nranks > 1: + mp_configs = fleet.fleet._user_defined_strategy.hybrid_configs[ + "mp_configs" + ] + + if mp_configs and ( + mp_configs.sync_param + or mp_configs.sync_grad + or mp_configs.sync_moment + ): + params = sorted( + [p for p in parameters_list if self._filter_fn(p)], + key=lambda p: p.name, + ) + + if mp_group.nranks > 1 and mp_configs and mp_configs.sync_grad: + for p in params: + if p.grad is None: + continue + paddle.distributed.broadcast( + p.grad, src=src_rank, group=mp_group, sync_op=True + ) + + self._inner_opt.step() + + if mp_group.nranks > 1 and mp_configs and mp_configs.sync_param: + for p in params: + paddle.distributed.broadcast( + p, src=src_rank, group=mp_group, sync_op=True + ) + + if mp_group.nranks > 1 and mp_configs and mp_configs.sync_moment: + for p in params: + # support opt state of adam and adamw to broadcast now. + if isinstance( + self._inner_opt, + (paddle.optimizer.Adam, paddle.optimizer.AdamW), + ): + if ( + self._inner_opt._multi_precision + and p.name in self._master_weights + ): + paddle.distributed.broadcast( + self._inner_opt._master_weights[p.name], + src=src_rank, + group=mp_group, + sync_op=True, + ) + + moment1 = self._inner_opt._get_accumulator( + self._inner_opt._moment1_acc_str, p + ) + moment2 = self._inner_opt._get_accumulator( + self._inner_opt._moment2_acc_str, p + ) + paddle.distributed.broadcast( + moment1, src=src_rank, group=mp_group, sync_op=True + ) + paddle.distributed.broadcast( + moment2, src=src_rank, group=mp_group, sync_op=True + ) + @no_grad() @framework.dygraph_only def step(self): @@ -302,7 +381,7 @@ def step(self): if self._dp_enable: fused_allreduce_gradients(list(parameters_list), self._hcg) - self._inner_opt.step() + self._step(parameters_list) @no_grad() def minimize( diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_mp_model.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_mp_model.py index dec1eb949ddb8..26e740bfa6b79 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_mp_model.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_mp_model.py @@ -181,6 +181,150 @@ def forward(self, x): return x +class TestDistMPSyncTraning(unittest.TestCase): + def setUp(self): + strategy = fleet.DistributedStrategy() + self.model_parallel_size = 2 + self.data_parallel_size = 1 + strategy.hybrid_configs = { + "dp_degree": self.data_parallel_size, + "mp_degree": self.model_parallel_size, + "pp_degree": 1, + "mp_configs": { + "sync_param": False, + "sync_grad": False, + "sync_moment": False, + }, + } + fleet.init(is_collective=True, strategy=strategy) + + def build_model_optimizer_train( + self, + batchs, + fp16=False, + mp_sync_param=False, + mp_sync_grad=False, + mp_sync_moment=False, + ): + hcg = fleet.get_hybrid_communicate_group() + word_size = hcg.get_model_parallel_world_size() + mp_id = hcg.get_model_parallel_rank() + dp_id = hcg.get_data_parallel_rank() + rank_id = dist.get_rank() + paddle.seed(2023) + np.random.seed(2023) + random.seed(2023) + set_random_seed(1024, dp_id, rank_id) + + np_fc1 = np.random.random_sample((hidden_size, inner_size)) + np_fc2 = np.random.random_sample((inner_size, hidden_size)) + + model = SimpleMPNet( + vocab_size, + hidden_size, + inner_size, + output_size, + np_fc1, + np_fc2, + mp_id, + ) + optimizer = paddle.optimizer.AdamW( + learning_rate=0.1, parameters=model.parameters() + ) + + strategy = fleet.fleet._user_defined_strategy + strategy.hybrid_configs = { + "dp_degree": self.data_parallel_size, + "mp_degree": self.model_parallel_size, + "pp_degree": 1, + "mp_configs": { + "sync_param": mp_sync_param, + "sync_grad": mp_sync_grad, + "sync_moment": mp_sync_moment, + }, + } + + model = fleet.distributed_model(model) + optimizer = fleet.distributed_optimizer(optimizer) + return self.train_batch(batchs, model, optimizer, fp16) + + def train_batch(self, batchs, model, optimizer, fp16=False): + losses = [] + if fp16: + scaler = paddle.amp.GradScaler(init_loss_scaling=1024) + scaler = fleet.distributed_scaler(scaler) + for batch in batchs: + with paddle.amp.auto_cast(enable=fp16, level='O1'): + output = model(batch) + loss = output.mean() + losses.append(loss.numpy()) + if fp16: + scaled = scaler.scale(loss) + scaled.backward() + scaler.step(optimizer) + scaler.update() + else: + loss.backward() + optimizer.step() + optimizer.clear_grad() + return losses + + def mp_sync_base( + self, mp_sync_param=False, mp_sync_grad=False, mp_sync_moment=False + ): + batchs = [] + for _ in range(5): + np_data = np.random.randint( + 0, + vocab_size, + ( + batch_size, + seq_length, + ), + ) + batchs.append(paddle.to_tensor(np_data)) + + losses = self.build_model_optimizer_train(batchs) + losses_sync = self.build_model_optimizer_train( + batchs, + mp_sync_param=mp_sync_param, + mp_sync_grad=mp_sync_grad, + mp_sync_moment=mp_sync_moment, + ) + + for i in range(len(losses)): + np.testing.assert_allclose(losses[i], losses_sync[i], rtol=1e-6) + + # test fp16 + losses_fp16 = self.build_model_optimizer_train(batchs, fp16=True) + losses_sync_fp16 = self.build_model_optimizer_train( + batchs, + fp16=True, + mp_sync_param=mp_sync_param, + mp_sync_grad=mp_sync_grad, + mp_sync_moment=mp_sync_moment, + ) + + for i in range(len(losses_fp16)): + np.testing.assert_allclose( + losses_fp16[i], losses_sync_fp16[i], rtol=1e-6 + ) + + def test_mp_sync_param(self): + self.mp_sync_base(mp_sync_param=True) + + def test_mp_sync_grad(self): + self.mp_sync_base(mp_sync_grad=True) + + def test_mp_sync_moment(self): + self.mp_sync_base(mp_sync_moment=True) + + def test_mp_sync_all(self): + self.mp_sync_base( + mp_sync_param=True, mp_sync_grad=True, mp_sync_moment=True + ) + + class TestDistMPTraning(unittest.TestCase): def setUp(self): strategy = fleet.DistributedStrategy() From aaf873b2859f8e70c8ed5be830674be211b2df8d Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Tue, 11 Apr 2023 13:17:40 +0800 Subject: [PATCH 07/59] [AMP OP&Test]Add fp16/bf16 support isnan/isfinite/isinf op (#52259) * add bfp16 test for isfinite * fixed for ci * deal with comments * fixed test * skip test in cpu * deal with comments * fixed for ci * fixed testcase * fixed for ci * fixed for testcase --- paddle/fluid/framework/data_type.h | 13 ++-- paddle/fluid/operators/isfinite_op.cu | 12 ++-- paddle/phi/kernels/cpu/isfinite_kernel.cc | 3 + paddle/phi/kernels/funcs/isfinite_functor.h | 21 ++++++ paddle/phi/kernels/gpu/isfinite_kernel.cu | 3 + .../fluid/tests/unittests/test_isfinite_op.py | 68 ++++++++++++++++++- python/paddle/tensor/math.py | 33 ++++++++- 7 files changed, 139 insertions(+), 14 deletions(-) mode change 100644 => 100755 paddle/fluid/operators/isfinite_op.cu mode change 100644 => 100755 python/paddle/fluid/tests/unittests/test_isfinite_op.py diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index a05f2858c0df3..7e002c8154147 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -83,12 +83,13 @@ struct DataTypeTrait { _ForEachDataTypeHelper_( \ callback, ::paddle::platform::complex, COMPLEX128); -#define _ForEachDataTypeNormal_(callback) \ - _ForEachDataTypeHelper_(callback, float, FP32); \ - _ForEachDataTypeHelper_(callback, double, FP64); \ - _ForEachDataTypeHelper_(callback, int, INT32); \ - _ForEachDataTypeHelper_(callback, int64_t, INT64); \ - _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); +#define _ForEachDataTypeNormal_(callback) \ + _ForEachDataTypeHelper_(callback, float, FP32); \ + _ForEachDataTypeHelper_(callback, double, FP64); \ + _ForEachDataTypeHelper_(callback, int, INT32); \ + _ForEachDataTypeHelper_(callback, int64_t, INT64); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::bfloat16, BF16); // For the use of thrust, as index-type elements can be only integers. #define _ForEachDataTypeTiny_(callback) \ diff --git a/paddle/fluid/operators/isfinite_op.cu b/paddle/fluid/operators/isfinite_op.cu old mode 100644 new mode 100755 index d8e18f58fa9f2..80a65cbda916b --- a/paddle/fluid/operators/isfinite_op.cu +++ b/paddle/fluid/operators/isfinite_op.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/operators/isfinite_op.h" +#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/float16.h" namespace ops = paddle::operators; @@ -22,18 +23,21 @@ REGISTER_OP_CUDA_KERNEL( ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel, - ops::OverflowKernel); + ops::OverflowKernel, + ops::OverflowKernel); REGISTER_OP_CUDA_KERNEL( isnan, ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel, - ops::OverflowKernel); + ops::OverflowKernel, + ops::OverflowKernel); REGISTER_OP_CUDA_KERNEL( isfinite, ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel, - ops::OverflowKernel); + ops::OverflowKernel, + ops::OverflowKernel); diff --git a/paddle/phi/kernels/cpu/isfinite_kernel.cc b/paddle/phi/kernels/cpu/isfinite_kernel.cc index 85d125794871d..c9f69c5f7e4f5 100644 --- a/paddle/phi/kernels/cpu/isfinite_kernel.cc +++ b/paddle/phi/kernels/cpu/isfinite_kernel.cc @@ -25,6 +25,7 @@ PD_REGISTER_KERNEL(isinf, float, double, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); @@ -37,6 +38,7 @@ PD_REGISTER_KERNEL(isnan, float, double, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); @@ -49,6 +51,7 @@ PD_REGISTER_KERNEL(isfinite, float, double, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); diff --git a/paddle/phi/kernels/funcs/isfinite_functor.h b/paddle/phi/kernels/funcs/isfinite_functor.h index 1dc4fd57b4857..795b8f275c87e 100644 --- a/paddle/phi/kernels/funcs/isfinite_functor.h +++ b/paddle/phi/kernels/funcs/isfinite_functor.h @@ -45,6 +45,13 @@ struct IsNanFunctor { } }; +template <> +struct IsNanFunctor { + HOSTDEVICE bool operator()(const phi::dtype::bfloat16& a) const { + return phi::dtype::isnan(a); + } +}; + template struct IsInfFunctor { HOSTDEVICE bool operator()(const T& a) const { @@ -69,6 +76,13 @@ struct IsInfFunctor { } }; +template <> +struct IsInfFunctor { + HOSTDEVICE bool operator()(const phi::dtype::bfloat16& a) const { + return phi::dtype::isinf(a); + } +}; + template struct IsFiniteFunctor { HOSTDEVICE bool operator()(const T& a) const { @@ -94,5 +108,12 @@ struct IsFiniteFunctor { } }; +template <> +struct IsFiniteFunctor { + HOSTDEVICE bool operator()(const phi::dtype::bfloat16& a) const { + return phi::dtype::isfinite(a); + } +}; + } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/gpu/isfinite_kernel.cu b/paddle/phi/kernels/gpu/isfinite_kernel.cu index e8c2fa022ec7a..9bde1d7a5bd38 100644 --- a/paddle/phi/kernels/gpu/isfinite_kernel.cu +++ b/paddle/phi/kernels/gpu/isfinite_kernel.cu @@ -25,6 +25,7 @@ PD_REGISTER_KERNEL(isinf, float, double, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); @@ -37,6 +38,7 @@ PD_REGISTER_KERNEL(isnan, float, double, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); @@ -49,6 +51,7 @@ PD_REGISTER_KERNEL(isfinite, float, double, phi::dtype::float16, + phi::dtype::bfloat16, int, int64_t) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_op.py old mode 100644 new mode 100755 index 6599f66140c22..efda5d502c6a6 --- a/python/paddle/fluid/tests/unittests/test_isfinite_op.py +++ b/python/paddle/fluid/tests/unittests/test_isfinite_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 from paddle.fluid import core @@ -48,6 +48,28 @@ def init_dtype(self): self.dtype = np.float16 +# BFP16 isinf Test +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestInfBF16(OpTest): + def setUp(self): + self.op_type = "isinf" + self.dtype = np.uint16 + x = np.random.uniform(0.1, 1, [11, 17]).astype(np.float32) + x[0] = np.inf + x[-1] = np.inf + + out = np.array(True) + self.inputs = {'X': convert_float_to_uint16(x)} + self.outputs = {'Out': out} + + def test_output(self): + self.check_output_with_place(core.CUDAPlace(0)) + + class TestNAN(OpTest): def setUp(self): self.op_type = "isnan" @@ -76,6 +98,28 @@ def init_dtype(self): self.dtype = np.float16 +# BFP16 isnan Test +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestNANBF16(OpTest): + def setUp(self): + self.op_type = "isnan" + self.dtype = np.uint16 + x = np.random.uniform(0.1, 1, [11, 17]).astype(np.float32) + x[0] = np.nan + x[-1] = np.nan + + out = np.array(True) + self.inputs = {'X': convert_float_to_uint16(x)} + self.outputs = {'Out': out} + + def test_output(self): + self.check_output_with_place(core.CUDAPlace(0)) + + class TestIsfinite(OpTest): def setUp(self): self.op_type = "isfinite" @@ -105,5 +149,27 @@ def init_dtype(self): self.dtype = np.float16 +# BFP16 isfinite Test +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestIsfiniteBF16(OpTest): + def setUp(self): + self.op_type = "isfinite" + self.dtype = np.uint16 + x = np.random.uniform(0.1, 1, [11, 17]).astype(np.float32) + x[0] = np.inf + x[-1] = np.nan + + out = np.array(False) + self.inputs = {'X': convert_float_to_uint16(x)} + self.outputs = {'Out': out} + + def test_output(self): + self.check_output_with_place(core.CUDAPlace(0)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index ba7efb7956f77..1e969be880401 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -3466,7 +3466,14 @@ def isfinite(x, name=None): check_variable_and_dtype( x, 'x', - ['float16', 'float32', 'float64', 'int32', 'int64'], + [ + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + 'uint16', + ], 'isfinite', ) out = helper.create_variable_for_type_inference('bool') @@ -3502,7 +3509,17 @@ def isinf(x, name=None): else: helper = LayerHelper("isinf_v2", **locals()) check_variable_and_dtype( - x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], 'isinf' + x, + 'x', + [ + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + 'uint16', + ], + 'isinf', ) out = helper.create_variable_for_type_inference(dtype='bool') helper.append_op(type="isinf_v2", inputs={"X": x}, outputs={"Out": out}) @@ -3535,7 +3552,17 @@ def isnan(x, name=None): else: helper = LayerHelper("isnan_v2", **locals()) check_variable_and_dtype( - x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], 'isnan' + x, + 'x', + [ + 'float16', + 'float32', + 'float64', + 'int32', + 'int64', + 'uint16', + ], + 'isnan', ) out = helper.create_variable_for_type_inference(dtype='bool') helper.append_op(type="isnan_v2", inputs={"X": x}, outputs={"Out": out}) From dee7d78d53a7a84b660df93b617d7b8ca2d53ec0 Mon Sep 17 00:00:00 2001 From: YuhangLi <104877312+piDack@users.noreply.github.com> Date: Tue, 11 Apr 2023 13:19:35 +0800 Subject: [PATCH 08/59] [AMP OP&Test]stack & unstack ops fp16 bf16 support (#50999) * stack fp16 & bf16 support * unstack fp16 support * unstack bf16 support * append stack fp16 ut * add unstack * recover unstack cpu kernel * fix some issue for unstack ut * delete unuse var * add check_place * fix inference err --- .../fluid/tests/unittests/test_stack_op.py | 41 ++++++++ .../fluid/tests/unittests/test_unstack_op.py | 98 ++++++++++++++++++- 2 files changed, 138 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py index d2411dda4b95a..b6a19615a6eda 100644 --- a/python/paddle/fluid/tests/unittests/test_stack_op.py +++ b/python/paddle/fluid/tests/unittests/test_stack_op.py @@ -105,6 +105,47 @@ def initParameters(self): self.enable_cinn = False +class TestStackFP16Op(TestStackOpBase): + def initParameters(self): + self.dtype = np.float16 + + +class TestStackFP16Op1(TestStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.num_inputs = 8 + + +class TestStackFP16Op2(TestStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.num_inputs = 10 + + +class TestStackFP16Op3(TestStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = -1 + + +class TestStackFP16Op4(TestStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = -4 + + +class TestStackFP16Op5(TestStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = 1 + + +class TestStackFP16Op6(TestStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = 3 + + class TestStackBF16Op(OpTest): def initDefaultParameters(self): self.num_inputs = 4 diff --git a/python/paddle/fluid/tests/unittests/test_unstack_op.py b/python/paddle/fluid/tests/unittests/test_unstack_op.py index 34c6950d7f1d8..9e20a78011c9d 100755 --- a/python/paddle/fluid/tests/unittests/test_unstack_op.py +++ b/python/paddle/fluid/tests/unittests/test_unstack_op.py @@ -15,9 +15,11 @@ import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 import paddle +from paddle import fluid +from paddle.fluid import core class TestUnStackOpBase(OpTest): @@ -64,6 +66,35 @@ def test_check_grad(self): self.check_grad(['X'], self.get_y_names()) +class TestUnStackFP16Op(TestUnStackOpBase): + def initParameters(self): + self.dtype = np.float16 + + +class TestStackFP16Op3(TestUnStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = -1 + + +class TestStackFP16Op4(TestUnStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = -3 + + +class TestStackFP16Op5(TestUnStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = 1 + + +class TestStackFP16Op6(TestUnStackOpBase): + def initParameters(self): + self.dtype = np.float16 + self.axis = 2 + + class TestStackOp3(TestUnStackOpBase): def initParameters(self): self.axis = -1 @@ -84,6 +115,71 @@ def initParameters(self): self.axis = 2 +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA and do not support bfloat16", +) +class TestUnStackBF16Op(OpTest): + def initDefaultParameters(self): + self.input_dim = (5, 6, 7) + self.axis = 0 + self.dtype = np.uint16 + + def initParameters(self): + pass + + def get_y_names(self): + y_names = [] + for i in range(self.input_dim[self.axis]): + y_names.append(f'y{i}') + return y_names + + def setUp(self): + self.initDefaultParameters() + self.initParameters() + self.op_type = 'unstack' + self.python_api = paddle.unstack + self.x = np.random.random(size=self.input_dim).astype(np.float32) + outs = np.split(self.x, self.input_dim[self.axis], self.axis) + new_shape = list(self.input_dim) + del new_shape[self.axis] + y_names = self.get_y_names() + tmp = [] + tmp_names = [] + for i in range(self.input_dim[self.axis]): + tmp.append( + ( + y_names[i], + np.reshape(convert_float_to_uint16(outs[i]), new_shape), + ) + ) + tmp_names.append(y_names[i]) + + self.x = convert_float_to_uint16(self.x) + self.python_out_sig = tmp_names + self.inputs = {'X': self.x} + self.outputs = {'Y': tmp} + self.attrs = {'axis': self.axis, 'num': self.input_dim[self.axis]} + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place) + + def test_check_grad(self): + with fluid.dygraph.guard(): + x = paddle.to_tensor(self.inputs['X']) + x.stop_gradient = False + y = paddle.unstack( + x, axis=self.attrs['axis'], num=self.attrs['num'] + ) + dx = paddle.grad(y, x)[0].numpy() + dx_expected = convert_float_to_uint16( + np.ones(self.input_dim, np.float32) + ) + np.testing.assert_array_equal(dx, dx_expected) + + class TestUnstackZeroInputOp(unittest.TestCase): def unstack_zero_input_static(self): From f352c23ed51c050f7273bd0b1ba54edd23f1be68 Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Tue, 11 Apr 2023 13:31:19 +0800 Subject: [PATCH 09/59] [CustomOP Unittest] Polish unit test of custom operator, kCPU->CPU (#52725) * [CustomOP Unittest] Polish unit test of custom operator, kCPU->CPU * AllocationType::CPU -> is_cpu() --- test/custom_op/attr_test_op.cc | 8 +++--- test/custom_op/context_pool_test_op.cc | 3 +-- test/custom_op/custom_concat_op.cc | 3 +-- test/custom_op/custom_conj_op.cc | 3 +-- test/custom_op/custom_inplace.cc | 31 ++++++++++++------------ test/custom_op/custom_optional.cc | 18 ++++++++------ test/custom_op/custom_relu_op.cc | 16 ++++++------ test/custom_op/custom_relu_op_xpu.cc | 2 +- test/custom_op/custom_simple_slice_op.cc | 3 +-- test/custom_op/custom_tanh_op.cc | 3 +-- test/custom_op/dispatch_test_op.cc | 12 ++++----- test/custom_op/multi_out_test_op.cc | 6 ++--- 12 files changed, 53 insertions(+), 55 deletions(-) diff --git a/test/custom_op/attr_test_op.cc b/test/custom_op/attr_test_op.cc index 14cb0aa7c716d..819d5e0ea3a2d 100644 --- a/test/custom_op/attr_test_op.cc +++ b/test/custom_op/attr_test_op.cc @@ -132,7 +132,7 @@ std::vector AttrTestForward( std::vector float_vec_attr, std::vector int64_vec_attr, std::vector str_vec_attr) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -173,7 +173,7 @@ std::vector AttrTestBackward( int int_attr, const std::vector& float_vec_attr, const std::vector& str_vec_attr) { - auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); + auto grad_x = paddle::empty_like(grad_out); PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( @@ -198,7 +198,7 @@ std::vector ConstAttrTestForward( const std::vector& float_vec_attr, const std::vector& int64_vec_attr, const std::vector& str_vec_attr) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -239,7 +239,7 @@ std::vector ConstAttrTestBackward( const int& int_attr, const std::vector& float_vec_attr, const std::vector& str_vec_attr) { - auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); + auto grad_x = paddle::empty_like(grad_out); PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( diff --git a/test/custom_op/context_pool_test_op.cc b/test/custom_op/context_pool_test_op.cc index 1687bdccc9227..72b28064f0a3f 100644 --- a/test/custom_op/context_pool_test_op.cc +++ b/test/custom_op/context_pool_test_op.cc @@ -17,8 +17,7 @@ #include "paddle/extension.h" #include "paddle/phi/backends/context_pool.h" -#define CHECK_INPUT(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") +#define CHECK_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") std::vector ContextPoolTest(const paddle::Tensor& x) { // 1. test cpu context diff --git a/test/custom_op/custom_concat_op.cc b/test/custom_op/custom_concat_op.cc index 80f76e2df54fe..e34fffff7b2bb 100644 --- a/test/custom_op/custom_concat_op.cc +++ b/test/custom_op/custom_concat_op.cc @@ -17,8 +17,7 @@ #include "concat_and_split.h" // NOLINT #include "paddle/extension.h" -#define CHECK_INPUT(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") +#define CHECK_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") int64_t ComputeAxis(int64_t axis, int64_t rank) { PD_CHECK(axis >= -rank && axis < rank, diff --git a/test/custom_op/custom_conj_op.cc b/test/custom_op/custom_conj_op.cc index 56938552420e7..0f76f715c427f 100644 --- a/test/custom_op/custom_conj_op.cc +++ b/test/custom_op/custom_conj_op.cc @@ -18,8 +18,7 @@ #include "paddle/extension.h" -#define CHECK_INPUT(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") +#define CHECK_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") template using EnableComplex = typename std::enable_if< diff --git a/test/custom_op/custom_inplace.cc b/test/custom_op/custom_inplace.cc index fbbe10b513ece..f7db7922bf3f7 100644 --- a/test/custom_op/custom_inplace.cc +++ b/test/custom_op/custom_inplace.cc @@ -18,6 +18,8 @@ #include "paddle/extension.h" +#define CHECK_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") + template void add_data_pointer(const data_t* x_data, data_t* out_data, int64_t numel) { for (size_t i = 0; i < numel; ++i) { @@ -52,7 +54,7 @@ void relu_backward_kernel(const data_t* out_data, } void AddForward(paddle::Tensor& x, const paddle::Tensor& y) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); PD_DISPATCH_FLOATING_TYPES( x.type(), "AddForward", ([&] { @@ -63,8 +65,8 @@ void AddForward(paddle::Tensor& x, const paddle::Tensor& y) { // NOLINT std::vector AddBackward(const paddle::Tensor& x, const paddle::Tensor& y, paddle::Tensor& out_grad) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); - PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor."); + CHECK_INPUT(x); + CHECK_INPUT(y); paddle::Tensor y_grad = paddle::empty(x.shape(), x.dtype(), x.place()); @@ -92,7 +94,7 @@ PD_BUILD_GRAD_OP(custom_add) // out[i] = x[i] + y void AddVectorForward(std::vector& x, // NOLINT const paddle::Tensor& y) { - PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor."); + CHECK_INPUT(y); PD_DISPATCH_FLOATING_TYPES(y.type(), "AddVectorForward", ([&] { for (size_t i = 0; i < x.size(); ++i) { @@ -109,9 +111,8 @@ std::vector AddVectorBackward( const std::vector& x, const paddle::Tensor& y, std::vector& out_grad) { // NOLINT - PD_CHECK(x[0].place() == paddle::PlaceType::kCPU, - "x[0] must be a CPU Tensor."); - PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor."); + CHECK_INPUT(x[0]); + CHECK_INPUT(y); PD_CHECK(x.size() == out_grad.size(), "x must have the same size as out_grad."); @@ -145,8 +146,8 @@ void MultiInplaceForward(paddle::Tensor& x, // NOLINT const paddle::Tensor& y, paddle::Tensor& a, // NOLINT const paddle::Tensor& b) { - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); - PD_CHECK(a.place() == paddle::PlaceType::kCPU, "a must be a CPU Tensor."); + CHECK_INPUT(x); + CHECK_INPUT(a); PD_DISPATCH_FLOATING_TYPES( x.type(), "MultiInplaceForward", ([&] { @@ -162,10 +163,10 @@ std::vector MultiInplaceBackward( const paddle::Tensor& a, const paddle::Tensor& b, paddle::Tensor& outab_grad) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); - PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor."); - PD_CHECK(a.place() == paddle::PlaceType::kCPU, "a must be a CPU Tensor."); - PD_CHECK(b.place() == paddle::PlaceType::kCPU, "b must be a CPU Tensor."); + CHECK_INPUT(x); + CHECK_INPUT(y); + CHECK_INPUT(a); + CHECK_INPUT(b); paddle::Tensor y_grad = paddle::empty(x.shape(), x.dtype(), x.place()); paddle::Tensor b_grad = paddle::empty(a.shape(), a.dtype(), a.place()); @@ -200,7 +201,7 @@ PD_BUILD_GRAD_OP(custom_multi_inplace) .SetKernelFn(PD_KERNEL(MultiInplaceBackward)); void ReluForwardInplace(paddle::Tensor& x) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); PD_DISPATCH_FLOATING_TYPES(x.type(), "ReluForward", ([&] { relu_forward_kernel(x.data(), @@ -211,7 +212,7 @@ void ReluForwardInplace(paddle::Tensor& x) { // NOLINT void ReluBackwardInplace(const paddle::Tensor& x, const paddle::Tensor& out, paddle::Tensor& grad_out) { // NOLINT - PD_CHECK(out.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(out); PD_DISPATCH_FLOATING_TYPES( grad_out.type(), "ReluBackward", ([&] { diff --git a/test/custom_op/custom_optional.cc b/test/custom_op/custom_optional.cc index 0e28ce84d5a35..9d247f4a27694 100644 --- a/test/custom_op/custom_optional.cc +++ b/test/custom_op/custom_optional.cc @@ -18,6 +18,8 @@ #include "paddle/extension.h" +#define CHECK_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") + template void add_one_pointer(const data_t* x_data, data_t* out_data, int64_t numel) { for (size_t i = 0; i < numel; ++i) { @@ -45,7 +47,7 @@ if (y) { std::vector AddForward( const paddle::Tensor& x, const paddle::optional& y) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor out = paddle::empty(x.shape(), x.dtype(), x.place()); if (y) { @@ -85,7 +87,7 @@ std::vector AddBackward( const paddle::Tensor& x, const paddle::optional& y, const paddle::Tensor& out_grad) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor x_grad = paddle::zeros(x.shape(), x.dtype(), x.place()); if (y) { @@ -118,7 +120,7 @@ if (y) { std::vector AddVectorForward( const paddle::Tensor& x, const paddle::optional>& y) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor out = paddle::zeros(x.shape(), x.dtype(), x.place()); PD_DISPATCH_FLOATING_TYPES( @@ -167,7 +169,7 @@ std::vector AddVectorBackward( const paddle::Tensor& x, const paddle::optional>& y, const paddle::Tensor& out_grad) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor x_grad = paddle::zeros(x.shape(), x.dtype(), x.place()); @@ -208,7 +210,7 @@ if (y) { std::vector AddOptionalInplaceForward( const paddle::Tensor& x, paddle::optional& y) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor outX = paddle::zeros(x.shape(), x.dtype(), x.place()); PD_DISPATCH_FLOATING_TYPES( @@ -252,7 +254,7 @@ std::vector AddOptionalInplaceBackward( const paddle::optional& y, const paddle::Tensor& outx_grad, paddle::optional& outy_grad) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor x_grad = paddle::zeros(x.shape(), x.dtype(), x.place()); @@ -313,7 +315,7 @@ if (y) { std::vector AddOptionalInplaceVectorForward( const paddle::Tensor& x, paddle::optional>& y) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor outX = paddle::zeros(x.shape(), x.dtype(), x.place()); PD_DISPATCH_FLOATING_TYPES( @@ -359,7 +361,7 @@ std::vector AddOptionalInplaceVectorBackward( const paddle::optional>& y, const paddle::Tensor& outx_grad, paddle::optional>& outy_grad) { // NOLINT - PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor."); + CHECK_INPUT(x); paddle::Tensor x_grad = paddle::zeros(x.shape(), x.dtype(), x.place()); diff --git a/test/custom_op/custom_relu_op.cc b/test/custom_op/custom_relu_op.cc index 7575887318ce3..5627bb28b921f 100644 --- a/test/custom_op/custom_relu_op.cc +++ b/test/custom_op/custom_relu_op.cc @@ -128,9 +128,9 @@ std::vector ReluBackward(const paddle::Tensor& x, std::vector ReluDoubleBackward(const paddle::Tensor& out, const paddle::Tensor& ddx) { - if (out.place() == paddle::PlaceType::kCPU) { + if (out.is_cpu()) { return relu_cpu_double_backward(out, ddx); - } else if (out.place() == paddle::PlaceType::kGPU) { + } else if (out.is_gpu()) { return relu_cuda_double_backward(out, ddx); } else { PD_THROW("Not implemented."); @@ -179,9 +179,9 @@ std::vector relu_cuda_backward_without_x( std::vector ReluBackwardWithoutX( const paddle::Tensor& out, const paddle::Tensor& grad_out) { - if (out.place() == paddle::PlaceType::kCPU) { + if (out.is_cpu()) { return relu_cpu_backward_without_x(out, grad_out); - } else if (out.place() == paddle::PlaceType::kGPU) { + } else if (out.is_gpu()) { return relu_cuda_backward_without_x(out, grad_out); } else { PD_THROW("Not implemented."); @@ -235,9 +235,9 @@ void relu_cuda_backward_out(const paddle::Tensor& x, paddle::Tensor* grad_x); void ReluForwardOut(const paddle::Tensor& x, paddle::Tensor* out) { - if (x.place() == paddle::PlaceType::kCPU) { + if (x.is_cpu()) { return relu_cpu_forward_out(x, out); - } else if (x.place() == paddle::PlaceType::kGPU) { + } else if (x.is_gpu()) { return relu_cuda_forward_out(x, out); } else { PD_THROW("Not implemented."); @@ -248,9 +248,9 @@ void ReluBackwardOut(const paddle::Tensor& x, const paddle::Tensor& out, const paddle::Tensor& grad_out, paddle::Tensor* grad_x) { - if (x.place() == paddle::PlaceType::kCPU) { + if (x.is_cpu()) { return relu_cpu_backward_out(x, out, grad_out, grad_x); - } else if (x.place() == paddle::PlaceType::kGPU) { + } else if (x.is_gpu()) { return relu_cuda_backward_out(x, out, grad_out, grad_x); } else { PD_THROW("Not implemented."); diff --git a/test/custom_op/custom_relu_op_xpu.cc b/test/custom_op/custom_relu_op_xpu.cc index c38f8b877da2c..ee717785ad848 100644 --- a/test/custom_op/custom_relu_op_xpu.cc +++ b/test/custom_op/custom_relu_op_xpu.cc @@ -161,7 +161,7 @@ std::vector ReluBackward(const paddle::Tensor& x, std::vector ReluDoubleBackward(const paddle::Tensor& out, const paddle::Tensor& ddx) { - if (out.place() == paddle::PlaceType::kCPU) { + if (out.is_cpu()) { return relu_cpu_double_backward(out, ddx); } else if (out.place().GetType() == phi::AllocationType::XPU) { return relu_xpu_double_backward(out, ddx); diff --git a/test/custom_op/custom_simple_slice_op.cc b/test/custom_op/custom_simple_slice_op.cc index 783e0cd96fdd9..21bd1b8ada27d 100644 --- a/test/custom_op/custom_simple_slice_op.cc +++ b/test/custom_op/custom_simple_slice_op.cc @@ -17,8 +17,7 @@ #include "paddle/extension.h" -#define CHECK_INPUT(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") +#define CHECK_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") std::vector SimpleSliceFunction(const paddle::Tensor& x, int64_t begin_index, diff --git a/test/custom_op/custom_tanh_op.cc b/test/custom_op/custom_tanh_op.cc index 399eb5b6366d7..a7a61b9528352 100644 --- a/test/custom_op/custom_tanh_op.cc +++ b/test/custom_op/custom_tanh_op.cc @@ -18,8 +18,7 @@ #include "paddle/extension.h" -#define CHECK_CPU_INPUT(x) \ - PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") +#define CHECK_CPU_INPUT(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") template void tanh_cpu_forward_kernel(const data_t* x_data, diff --git a/test/custom_op/dispatch_test_op.cc b/test/custom_op/dispatch_test_op.cc index 0f7d323b5451e..39e1a24fe2327 100644 --- a/test/custom_op/dispatch_test_op.cc +++ b/test/custom_op/dispatch_test_op.cc @@ -27,7 +27,7 @@ void assign_cpu_kernel(const data_t* x_data, } std::vector DispatchTestInterger(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_INTEGRAL_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -45,7 +45,7 @@ PD_BUILD_OP(dispatch_test_integer) std::vector DispatchTestFloatAndInteger( const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -62,7 +62,7 @@ PD_BUILD_OP(dispatch_test_float_and_integer) .SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger)); std::vector DispatchTestComplex(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -80,7 +80,7 @@ PD_BUILD_OP(dispatch_test_complex) std::vector DispatchTestFloatAndComplex( const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -98,7 +98,7 @@ PD_BUILD_OP(dispatch_test_float_and_complex) std::vector DispatchTestFloatAndIntegerAndComplex( const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -115,7 +115,7 @@ PD_BUILD_OP(dispatch_test_float_and_integer_and_complex) .SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex)); std::vector DispatchTestFloatAndHalf(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_FLOATING_AND_HALF_TYPES( x.type(), "assign_cpu_kernel", ([&] { diff --git a/test/custom_op/multi_out_test_op.cc b/test/custom_op/multi_out_test_op.cc index d9e0526e4206e..7007058cbb93e 100644 --- a/test/custom_op/multi_out_test_op.cc +++ b/test/custom_op/multi_out_test_op.cc @@ -34,7 +34,7 @@ void fill_constant_cpu_kernel(data_t* out_data, int64_t x_numel, data_t value) { } std::vector MultiOutCPU(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto out = paddle::empty_like(x); PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -43,13 +43,13 @@ std::vector MultiOutCPU(const paddle::Tensor& x) { })); // fake multi output: Fake_float64 with float64 dtype - auto fake_float64 = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto fake_float64 = paddle::empty_like(x); fill_constant_cpu_kernel( fake_float64.mutable_data(x.place()), x.size(), 0.); // fake multi output: ZFake_int32 with int32 dtype - auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); + auto zfake_int32 = paddle::empty_like(x); fill_constant_cpu_kernel( zfake_int32.mutable_data(x.place()), x.size(), 1); From 9b88eef1a1b6ecb5a3c305d57b98e6c0d87bb318 Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Tue, 11 Apr 2023 13:31:29 +0800 Subject: [PATCH 10/59] [Polish CustomOP] Polish python codes, delete useless variable (#52728) --- python/paddle/utils/cpp_extension/extension_utils.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index e78cc85f73ca0..8ff70ca4c0e6f 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -1041,7 +1041,6 @@ def _gen_output_content( ): # ' ' * tab space * tab number indent = ' ' * 4 * 2 - inplace_idx = {v: k for k, v in inplace_reverse_idx.items()} dynamic_content = f""" {indent}res = [] {indent}start_idx = 0""" @@ -1134,7 +1133,6 @@ def _custom_api_content(op_name): attrs_map, inplace_reverse_idx, ) - lower_in_list = [p.split("@")[0].lower() for p in in_names] API_TEMPLATE = textwrap.dedent( """ import paddle.fluid.core as core @@ -1161,11 +1159,6 @@ def {op_name}({params_list}): api_content = API_TEMPLATE.format( op_name=op_name, params_list=params_list, - ins_map=ins_map, - attrs_map=attrs_map, - # "[x, y, z]"" - in_names="[" + ",".join(lower_in_list) + "]", - attr_names="[" + ",".join(attr_names) + "]", outs_list=outs_list, dynamic_content=dynamic_content, static_content=static_content, From 439551bd6edb9191b09da101203c55fc211298fd Mon Sep 17 00:00:00 2001 From: Kai Xing Date: Tue, 11 Apr 2023 14:05:15 +0800 Subject: [PATCH 11/59] [Test MV] fft (#52634) * [Test MV] fft * Update test_spectral_op.py --- python/paddle/fluid/tests/unittests/CMakeLists.txt | 4 ---- test/CMakeLists.txt | 4 +++- .../paddle/fluid/tests/unittests => test}/fft/CMakeLists.txt | 0 {python/paddle/fluid/tests/unittests => test}/fft/__init__.py | 0 .../fluid/tests/unittests => test}/fft/spectral_op_np.py | 0 {python/paddle/fluid/tests/unittests => test}/fft/test_fft.py | 0 .../unittests => test}/fft/test_fft_with_static_graph.py | 0 .../fluid/tests/unittests => test}/fft/test_spectral_op.py | 2 +- 8 files changed, 4 insertions(+), 6 deletions(-) rename {python/paddle/fluid/tests/unittests => test}/fft/CMakeLists.txt (100%) rename {python/paddle/fluid/tests/unittests => test}/fft/__init__.py (100%) rename {python/paddle/fluid/tests/unittests => test}/fft/spectral_op_np.py (100%) rename {python/paddle/fluid/tests/unittests => test}/fft/test_fft.py (100%) rename {python/paddle/fluid/tests/unittests => test}/fft/test_fft_with_static_graph.py (100%) rename {python/paddle/fluid/tests/unittests => test}/fft/test_spectral_op.py (99%) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 6f461538a7c8d..491cae679975f 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -774,10 +774,6 @@ add_subdirectory(sequence) add_subdirectory(rnn) add_subdirectory(distribution) -if(NOT WIN32 OR NOT WITH_GPU) - add_subdirectory(fft) -endif() - if(WITH_XPU) add_subdirectory(xpu) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d9d9cb5504f1c..215771713ecc2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -107,7 +107,9 @@ if(WITH_TESTING) # add_subdirectory(distributed_passes) # add_subdirectory(distribution) add_subdirectory(dygraph_to_static) - # add_subdirectory(fft) + if(NOT WIN32 OR NOT WITH_GPU) + add_subdirectory(fft) + endif() # add_subdirectory(fleet) if(WITH_IPU) add_subdirectory(ipu) diff --git a/python/paddle/fluid/tests/unittests/fft/CMakeLists.txt b/test/fft/CMakeLists.txt similarity index 100% rename from python/paddle/fluid/tests/unittests/fft/CMakeLists.txt rename to test/fft/CMakeLists.txt diff --git a/python/paddle/fluid/tests/unittests/fft/__init__.py b/test/fft/__init__.py similarity index 100% rename from python/paddle/fluid/tests/unittests/fft/__init__.py rename to test/fft/__init__.py diff --git a/python/paddle/fluid/tests/unittests/fft/spectral_op_np.py b/test/fft/spectral_op_np.py similarity index 100% rename from python/paddle/fluid/tests/unittests/fft/spectral_op_np.py rename to test/fft/spectral_op_np.py diff --git a/python/paddle/fluid/tests/unittests/fft/test_fft.py b/test/fft/test_fft.py similarity index 100% rename from python/paddle/fluid/tests/unittests/fft/test_fft.py rename to test/fft/test_fft.py diff --git a/python/paddle/fluid/tests/unittests/fft/test_fft_with_static_graph.py b/test/fft/test_fft_with_static_graph.py similarity index 100% rename from python/paddle/fluid/tests/unittests/fft/test_fft_with_static_graph.py rename to test/fft/test_fft_with_static_graph.py diff --git a/python/paddle/fluid/tests/unittests/fft/test_spectral_op.py b/test/fft/test_spectral_op.py similarity index 99% rename from python/paddle/fluid/tests/unittests/fft/test_spectral_op.py rename to test/fft/test_spectral_op.py index 6b8ab6cc2ff04..075d68b68ed47 100644 --- a/python/paddle/fluid/tests/unittests/fft/test_spectral_op.py +++ b/test/fft/test_spectral_op.py @@ -29,7 +29,7 @@ from paddle import _C_ops sys.path.append("../") -from eager_op_test import OpTest +from paddle.fluid.tests.unittests.eager_op_test import OpTest paddle.enable_static() From 6366cffe07644d32c0d542e0374ef206d86fdb8d Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Tue, 11 Apr 2023 14:06:16 +0800 Subject: [PATCH 12/59] fix check nan bug (#52729) --- .../auto_code_generator/generator/eager_gen.py | 3 +++ paddle/fluid/eager/nan_inf_utils.cc | 13 +++++++++++++ paddle/fluid/eager/nan_inf_utils.h | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 278fbf127036b..e22355d88d329 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -276,6 +276,8 @@ class {} : public egr::GradNodeBase {{ // Before log info {} // Forward API Call +{} + // Check NaN and Inf if needed {} // Get Outputs {} @@ -1675,6 +1677,7 @@ def GenerateForwardDefinitionAndDeclaration(self, is_inplaced): forward_api_name, before_log_str, forward_call_str, + check_nan_inf_str, get_outputs_str, forward_api_name, check_inplace_str, diff --git a/paddle/fluid/eager/nan_inf_utils.cc b/paddle/fluid/eager/nan_inf_utils.cc index 17cf8825d5c15..6eae40fca36cf 100644 --- a/paddle/fluid/eager/nan_inf_utils.cc +++ b/paddle/fluid/eager/nan_inf_utils.cc @@ -122,6 +122,11 @@ void CheckTensorHasNanOrInf(const std::string& api_name, const Tensor& tensor) { } } +void CheckTensorHasNanOrInf(const std::string& api_name, + const paddle::optional& tensor) { + CheckTensorHasNanOrInf(api_name, tensor.get()); +} + void CheckTensorHasNanOrInf(const std::string& api_name, const TupleOfTwoTensors& tensors) { CheckTensorHasNanOrInf(api_name, std::get<0>(tensors)); @@ -169,6 +174,14 @@ void CheckTensorHasNanOrInf(const std::string& api_name, } } +void CheckTensorHasNanOrInf( + const std::string& api_name, + const paddle::optional>& tensors) { + if (tensors) { + CheckTensorHasNanOrInf(api_name, tensors.get()); + } +} + void CheckTensorHasNanOrInf( const std::string& api_name, const paddle::small_vector, diff --git a/paddle/fluid/eager/nan_inf_utils.h b/paddle/fluid/eager/nan_inf_utils.h index cb19fd2f9d794..8d7ed7ffb76b2 100644 --- a/paddle/fluid/eager/nan_inf_utils.h +++ b/paddle/fluid/eager/nan_inf_utils.h @@ -20,6 +20,7 @@ #include "paddle/fluid/eager/type_defs.h" #include "paddle/phi/api/include/tensor.h" +#include "paddle/utils/optional.h" #include "paddle/utils/small_vector.h" namespace egr { @@ -36,6 +37,9 @@ using TupleOfTensorAndVector = void CheckTensorHasNanOrInf(const std::string& api_name, const Tensor& tensor); +void CheckTensorHasNanOrInf(const std::string& api_name, + const paddle::optional& tensor); + void CheckTensorHasNanOrInf(const std::string& api_name, const TupleOfTwoTensors& tensors); @@ -54,6 +58,10 @@ void CheckTensorHasNanOrInf(const std::string& api_name, void CheckTensorHasNanOrInf(const std::string& api_name, const std::vector& tensors); +void CheckTensorHasNanOrInf( + const std::string& api_name, + const paddle::optional>& tensors); + void CheckTensorHasNanOrInf(const std::string& api_name, const TupleOfTensorAndVector& tensors); From 29ab75b638e151c424c711b3b1b2543fda3a0e88 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 11 Apr 2023 14:10:49 +0800 Subject: [PATCH 13/59] move test_*tokenizer to /test/tokenizer (#52658) --- python/paddle/fluid/tests/unittests/CMakeLists.txt | 2 -- test/CMakeLists.txt | 2 +- test/tokenizer/CMakeLists.txt | 12 ++++++++++++ .../tests/unittests => test}/tokenizer/__init__.py | 0 .../unittests => test}/tokenizer/bert_tokenizer.py | 0 .../tokenizer}/test_faster_tokenizer_op.py | 5 +---- .../unittests => test}/tokenizer/tokenizer_utils.py | 0 7 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 test/tokenizer/CMakeLists.txt rename {python/paddle/fluid/tests/unittests => test}/tokenizer/__init__.py (100%) rename {python/paddle/fluid/tests/unittests => test}/tokenizer/bert_tokenizer.py (100%) rename {python/paddle/fluid/tests/unittests => test/tokenizer}/test_faster_tokenizer_op.py (99%) rename {python/paddle/fluid/tests/unittests => test}/tokenizer/tokenizer_utils.py (100%) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 491cae679975f..63279cffc3e51 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -552,8 +552,6 @@ if((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)) endif() set_tests_properties(test_conv2d_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") -set_tests_properties(test_faster_tokenizer_op PROPERTIES LABELS - "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv2d_api PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 215771713ecc2..4d50fe16b9b05 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -126,7 +126,7 @@ if(WITH_TESTING) add_subdirectory(rpc) # add_subdirectory(sequence) add_subdirectory(standalone_executor) - # add_subdirectory(tokenizer) + add_subdirectory(tokenizer) # add_subdirectory(white_list) add_subdirectory(xpu) endif() diff --git a/test/tokenizer/CMakeLists.txt b/test/tokenizer/CMakeLists.txt new file mode 100644 index 0000000000000..1cf384df660b3 --- /dev/null +++ b/test/tokenizer/CMakeLists.txt @@ -0,0 +1,12 @@ +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() + +set_tests_properties(test_faster_tokenizer_op PROPERTIES LABELS + "RUN_TYPE=EXCLUSIVE") diff --git a/python/paddle/fluid/tests/unittests/tokenizer/__init__.py b/test/tokenizer/__init__.py similarity index 100% rename from python/paddle/fluid/tests/unittests/tokenizer/__init__.py rename to test/tokenizer/__init__.py diff --git a/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py b/test/tokenizer/bert_tokenizer.py similarity index 100% rename from python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py rename to test/tokenizer/bert_tokenizer.py diff --git a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py b/test/tokenizer/test_faster_tokenizer_op.py similarity index 99% rename from python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py rename to test/tokenizer/test_faster_tokenizer_op.py index 6972505bf3cbb..37bb09a514a18 100755 --- a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py +++ b/test/tokenizer/test_faster_tokenizer_op.py @@ -13,20 +13,17 @@ # limitations under the License. import os -import sys import tempfile import unittest import numpy as np +from bert_tokenizer import BertTokenizer import paddle from paddle import _legacy_C_ops, nn from paddle.fluid.framework import _non_static_mode, core from paddle.fluid.layer_helper import LayerHelper -sys.path.append("./tokenizer") -from tokenizer.bert_tokenizer import BertTokenizer - def to_string_tensor(string_values, name): """ diff --git a/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py b/test/tokenizer/tokenizer_utils.py similarity index 100% rename from python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py rename to test/tokenizer/tokenizer_utils.py From f03dcff7a57d83bf6997990ced8bcd85e6aadd70 Mon Sep 17 00:00:00 2001 From: Zhang Zheng <32410583+ZzSean@users.noreply.github.com> Date: Tue, 11 Apr 2023 14:13:11 +0800 Subject: [PATCH 14/59] Update approver list of checking file diff, test=document_fix (#52756) --- tools/check_file_diff_approvals.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 50f9344c66fe4..8cfcb63e84c70 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -248,8 +248,8 @@ fi NO_NPU_FILE=`git diff --name-only upstream/$BRANCH | grep -v "_npu.py"` HAS_UNITTEST_SKIP=`git diff -U0 upstream/$BRANCH ${NO_NPU_FILE} | grep "^+[[:space:]]\{0,\}@unittest.skip" || true` if [ "${HAS_UNITTEST_SKIP}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then - echo_line="Unittest is not allowed to be disabled.\nYou must have one RD (kolinwei(Recommend), wanghuancoder, luotao1, QingshuChen or qili93) approval for the usage of @unittest.skip or @unittest.skipIf.\n${HAS_UNITTEST_SKIP}\n" - check_approval 1 22165420 6836917 46661762 26922892 16605440 2002279 + echo_line="Unittest is not allowed to be disabled.\nYou must have one RD (kolinwei(Recommend), wanghuancoder, luotao1, QingshuChen, qili93 or ZzSean) approval for the usage of @unittest.skip or @unittest.skipIf.\n${HAS_UNITTEST_SKIP}\n" + check_approval 1 22165420 6836917 46661762 26922892 16605440 2002279 32410583 fi HAS_MODIFIED_DEMO_CMAKE=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/inference/api/demo_ci/CMakeLists.txt" || true` @@ -456,8 +456,8 @@ if [ "${NEW_OP_TEST_ADDED}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then CHECK_WHOLE=$CHECK_OUTPUT$CHECK_OUTPUT_WITH_PLACE$CHECK_GRAD$CHECK_GRAD_CHECK if [ "${CHECK_WHOLE}" != "" ] ; then CHECK_OP=${CHECK_WHOLE//+/'\n+'} - echo_line="Please use the default precision parameters of 'atol, rtol, eps, max_relative_error'. If you don't use the default value, you must have one RD (Xreki (Recommend), fuyinno4, QingshuChen(Recommend for kunlun), zhiqiu or qili93 (Recommend for NPU) , luotao1, lanxianghit or phlrain) approval for the usage of other values. The detailed information is in the link: https://github.cor/PaddlePaddle/Paddle/wiki/OP-test-accuracy-requirements. The error line is ${CHECK_OP}\n" - check_approval 1 6836917 47554610 12538138 43953930 35824027 6888866 16605440 2002279 + echo_line="Please use the default precision parameters of 'atol, rtol, eps, max_relative_error'. If you don't use the default value, you must have one RD (Xreki (Recommend), fuyinno4, QingshuChen(Recommend for kunlun), zhiqiu or qili93 (Recommend for NPU) , luotao1, lanxianghit, phlrain or ZzSean) approval for the usage of other values. The detailed information is in the link: https://github.cor/PaddlePaddle/Paddle/wiki/OP-test-accuracy-requirements. The error line is ${CHECK_OP}\n" + check_approval 1 6836917 47554610 12538138 43953930 35824027 6888866 16605440 2002279 32410583 fi fi From 3951c40d911554966726ec575ac303ea89899e0b Mon Sep 17 00:00:00 2001 From: zhangyuqin1998 <75946871+zhangyuqin1998@users.noreply.github.com> Date: Tue, 11 Apr 2023 15:12:12 +0800 Subject: [PATCH 15/59] delete remote_prefetch (#52748) --- paddle/fluid/operators/hierarchical_sigmoid_op.cc | 1 - paddle/phi/api/yaml/legacy_backward.yaml | 4 ++-- paddle/phi/api/yaml/legacy_ops.yaml | 2 +- paddle/phi/infermeta/multiary.cc | 1 - paddle/phi/infermeta/multiary.h | 1 - paddle/phi/kernels/cpu/hsigmoid_loss_grad.h | 1 - paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc | 2 -- paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc | 1 - paddle/phi/kernels/hsigmoid_loss_grad_kernel.h | 1 - paddle/phi/kernels/hsigmoid_loss_kernel.h | 1 - .../phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc | 2 -- .../phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h | 1 - paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc | 6 +++--- python/paddle/fluid/tests/unittests/test_hsigmoid_op.py | 1 - python/paddle/nn/functional/loss.py | 1 - 15 files changed, 6 insertions(+), 20 deletions(-) diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc index e1de4a9a4d312..e73d2a2b5ce39 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc @@ -124,7 +124,6 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("num_classes", "(int, optional), The number of classes") .SetDefault(2); // for parameter prefetch - AddAttr("remote_prefetch", "").SetDefault(false); AddAttr("trainer_id", "trainer id from 0 ~ worker_num.").SetDefault(0); AddAttr>("height_sections", "Height for each output SelectedRows.") diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 4ba99b1b81312..4e21865c23b31 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -475,8 +475,8 @@ func : heaviside_grad - backward_op : hsigmoid_loss_grad - forward : hsigmoid_loss (Tensor x, Tensor label, Tensor w, Tensor bias, Tensor path, Tensor code, int num_classes, bool remote_prefetch, bool is_sparse) -> Tensor(out), Tensor(pre_out), Tensor(w_out) - args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, Tensor pre_out, Tensor out_grad, int num_classes, bool remote_prefetch, bool is_sparse) + forward : hsigmoid_loss (Tensor x, Tensor label, Tensor w, Tensor bias, Tensor path, Tensor code, int num_classes, bool is_sparse) -> Tensor(out), Tensor(pre_out), Tensor(w_out) + args : (Tensor x, Tensor w, Tensor label, Tensor path, Tensor code, Tensor bias, Tensor pre_out, Tensor out_grad, int num_classes, bool is_sparse) output : Tensor(x_grad), Tensor(w_grad), Tensor(bias_grad) infer_meta : func : GeneralTernaryGradInferMeta diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 2d0aadcf5362c..217afd146f844 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -684,7 +684,7 @@ backward : heaviside_grad - op : hsigmoid_loss - args : (Tensor x, Tensor label, Tensor w, Tensor bias, Tensor path, Tensor code, int num_classes, bool remote_prefetch, bool is_sparse) + args : (Tensor x, Tensor label, Tensor w, Tensor bias, Tensor path, Tensor code, int num_classes, bool is_sparse) output : Tensor(out), Tensor(pre_out), Tensor(w_out) infer_meta : func : HSigmoidLossInferMeta diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 7364f85e75155..71fe149e7c0c0 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1432,7 +1432,6 @@ void HSigmoidLossInferMeta(const MetaTensor& x, const MetaTensor& path, const MetaTensor& code, int num_classes, - bool remote_prefetch, bool is_sparse, MetaTensor* out, MetaTensor* pre_out, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 178910e3620c9..307e6115cfd56 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -312,7 +312,6 @@ void HSigmoidLossInferMeta(const MetaTensor& x, const MetaTensor& path, const MetaTensor& code, int num_classes, - bool remote_prefetch, bool is_sparse, MetaTensor* out, MetaTensor* pre_out, diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h index 8c8b40c8d9fd0..f4b35c9101836 100644 --- a/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h +++ b/paddle/phi/kernels/cpu/hsigmoid_loss_grad.h @@ -35,7 +35,6 @@ void HSigmoidLossGradKernelImpl(const Context& ctx, const DenseTensor& pre_out, const DenseTensor& out_grad, int num_classes, - bool remote_prefetch, bool is_sparse, DenseTensor* x_grad, DenseTensor* w_grad, diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc index bc741b32b3afc..9b7a2fd574ea8 100644 --- a/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/hsigmoid_loss_grad_kernel.cc @@ -31,7 +31,6 @@ void HSigmoidLossGradKernel(const Context& ctx, const DenseTensor& pre_out, const DenseTensor& out_grad, int num_classes, - bool remote_prefetch, bool is_sparse, DenseTensor* x_grad, DenseTensor* w_grad, @@ -46,7 +45,6 @@ void HSigmoidLossGradKernel(const Context& ctx, pre_out, out_grad, num_classes, - remote_prefetch, is_sparse, x_grad, w_grad, diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc index c6ee49ef34786..2a611a8d541ca 100644 --- a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc @@ -34,7 +34,6 @@ void HSigmoidLossKernel(const Context& ctx, const paddle::optional& path, const paddle::optional& code, int num_classes, - bool remote_prefetch, bool is_sparse, DenseTensor* out, DenseTensor* pre_out, diff --git a/paddle/phi/kernels/hsigmoid_loss_grad_kernel.h b/paddle/phi/kernels/hsigmoid_loss_grad_kernel.h index c36b343017fd5..254264b8c276e 100644 --- a/paddle/phi/kernels/hsigmoid_loss_grad_kernel.h +++ b/paddle/phi/kernels/hsigmoid_loss_grad_kernel.h @@ -29,7 +29,6 @@ void HSigmoidLossGradKernel(const Context& ctx, const DenseTensor& pre_out, const DenseTensor& out_grad, int num_classes, - bool remote_prefetch, bool is_sparse, DenseTensor* x_grad, DenseTensor* w_grad, diff --git a/paddle/phi/kernels/hsigmoid_loss_kernel.h b/paddle/phi/kernels/hsigmoid_loss_kernel.h index 33a90c637e4e4..f1b659a5ba129 100644 --- a/paddle/phi/kernels/hsigmoid_loss_kernel.h +++ b/paddle/phi/kernels/hsigmoid_loss_kernel.h @@ -27,7 +27,6 @@ void HSigmoidLossKernel(const Context& ctx, const paddle::optional& path, const paddle::optional& code, int num_classes, - bool remote_prefetch, bool is_sparse, DenseTensor* out, DenseTensor* pre_out, diff --git a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc index 4bb0352528e4e..9d450f1d5dbed 100644 --- a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc +++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc @@ -48,7 +48,6 @@ void HSigmoidLossGradKernel(const Context& ctx, const DenseTensor& pre_out, const DenseTensor& out_grad, int num_classes, - bool remote_prefetch, bool is_sparse, DenseTensor* x_grad, SelectedRows* w_grad, @@ -74,7 +73,6 @@ void HSigmoidLossGradKernel(const Context& ctx, pre_out, out_grad, num_classes, - remote_prefetch, is_sparse, x_grad, w_grad_value, diff --git a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h index 94ac63183fbfb..50719408acf11 100644 --- a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h +++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h @@ -31,7 +31,6 @@ void HSigmoidLossGradKernel(const Context& ctx, const DenseTensor& pre_out, const DenseTensor& out_grad, int num_classes, - bool remote_prefetch, bool is_sparse, DenseTensor* x_grad, SelectedRows* w_grad, diff --git a/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc b/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc index a8db0b33242bd..9499e0b9fc0dd 100644 --- a/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc +++ b/paddle/phi/ops/compat/hierarchical_sigmoid_sig.cc @@ -20,7 +20,7 @@ KernelSignature HierarchicalSigmoidOpArgumentMapping( const ArgumentMappingContext& ctx) { return KernelSignature("hsigmoid_loss", {"X", "Label", "W", "Bias", "PathTable", "PathCode"}, - {"num_classes", "remote_prefetch", "is_sparse"}, + {"num_classes", "is_sparse"}, {"Out", "PreOut", "W_Out"}); } @@ -36,7 +36,7 @@ KernelSignature HierarchicalSigmoidGradOpArgumentMapping( "Bias", "PreOut", "Out@GRAD"}, - {"num_classes", "remote_prefetch", "is_sparse"}, + {"num_classes", "is_sparse"}, {"X@GRAD", "W@GRAD", "Bias@GRAD"}); } else if (ctx.IsSelectedRowsOutput("W@GRAD")) { return KernelSignature("hsigmoid_loss_grad_sr", @@ -48,7 +48,7 @@ KernelSignature HierarchicalSigmoidGradOpArgumentMapping( "Bias", "PreOut", "Out@GRAD"}, - {"num_classes", "remote_prefetch", "is_sparse"}, + {"num_classes", "is_sparse"}, {"X@GRAD", "W@GRAD", "Bias@GRAD"}); } else { return KernelSignature("unregistered", {}, {}, {}); diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py index 9698fe9c54c05..752fbab31d57a 100644 --- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py @@ -177,7 +177,6 @@ def python_api( path_code=None, num_classes=-1, is_sparse=False, - remote_prefetch=False, ): return paddle.nn.functional.hsigmoid_loss( input, diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 4b57c9d936123..c2c98361c75e7 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -1016,7 +1016,6 @@ def hsigmoid_loss( attrs = { "num_classes": num_classes, "is_sparse": is_sparse, - "remote_prefetch": is_sparse, } inputs = { From 5ab7927325ea7e77d950a3cf38d9f7cd6d1a483e Mon Sep 17 00:00:00 2001 From: wangxiaoning <71813629+wangxn12138@users.noreply.github.com> Date: Tue, 11 Apr 2023 15:13:13 +0800 Subject: [PATCH 16/59] fix save inf (#52632) --- .../distributed/fleet/runtime/parameter_server_runtime.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 24df3203183f5..3776583371526 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -24,7 +24,6 @@ Variable, default_main_program, default_startup_program, - save_inference_model, ) from ..base.private_helper_function import wait_server_ready @@ -735,7 +734,7 @@ def _ps_inference_save_inference_model( raise TypeError( "in fleet.save_inference_model() function, main_program must be as Program type, CompiledProgram is not allowed" ) - save_inference_model( + paddle.fluid.io.save_inference_model( dirname, feeded_var_names, target_vars, @@ -746,7 +745,7 @@ def _ps_inference_save_inference_model( export_for_deployment, ) else: - save_inference_model( + paddle.fluid.io.save_inference_model( dirname, feeded_var_names, target_vars, From 4a74f4c5aaca9dc36fe2abb7990fe3bd056d87ec Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 11 Apr 2023 15:39:33 +0800 Subject: [PATCH 17/59] support auto generate static for randperm (#52531) * support auto generate static for randperm * remove enforce in randperm infermeta --- paddle/fluid/operators/randperm_op.cc | 98 ------------------- paddle/fluid/operators/unity_build_rule.cmake | 2 - paddle/phi/api/yaml/op_compat.yaml | 6 ++ paddle/phi/api/yaml/static_ops.yaml | 11 +++ paddle/phi/ops/compat/randperm_sig.cc | 25 ----- 5 files changed, 17 insertions(+), 125 deletions(-) delete mode 100644 paddle/fluid/operators/randperm_op.cc delete mode 100644 paddle/phi/ops/compat/randperm_sig.cc diff --git a/paddle/fluid/operators/randperm_op.cc b/paddle/fluid/operators/randperm_op.cc deleted file mode 100644 index 187b227f33170..0000000000000 --- a/paddle/fluid/operators/randperm_op.cc +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/randperm_op.h" - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" - -namespace paddle { -namespace operators { - -class RandpermOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "The output(Out) of randperm op must not be null.")); - int n = ctx->Attrs().Get("n"); - PADDLE_ENFORCE_GT( - n, - 0, - platform::errors::InvalidArgument( - "The input 'n' of randperm op should be greater than 0. " - "But received %d.", - n)); - - ctx->SetOutputDim("Out", phi::make_ddim({n})); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - auto data_type = - static_cast(ctx.Attr("dtype")); - return phi::KernelKey(data_type, ctx.GetPlace()); - } -}; - -class RandpermOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("Out", "The output tensor of randperm op."); - - AddAttr( - "n", "The upper bound (exclusive), and it should be greater than 0."); - AddAttr("dtype", - "The data type of output tensor. " - "Default: 3[int64].") - .SetDefault(framework::proto::VarType::INT64); - AddAttr("seed", - "Random seed used for permute samples. " - "0 means use a seed generated by the system." - "Note that if seed is not 0, this operator will always " - "generate the same random permutation every time. " - "Default: 0.") - .SetDefault(0); - - AddComment(R"DOC( -This operator returns a random permutation of integers from 0 to n-1. -)DOC"); - } -}; - -class RandpermOpVarTypeInference : public framework::VarTypeInference { - public: - void operator()(framework::InferVarTypeContext *ctx) const override { - auto var_data_type = static_cast( - PADDLE_GET_CONST(int, ctx->GetAttr("dtype"))); - ctx->SetOutputDataType("Out", var_data_type); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR( - randperm, - paddle::operators::RandpermOp, - paddle::operators::RandpermOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::operators::RandpermOpVarTypeInference); diff --git a/paddle/fluid/operators/unity_build_rule.cmake b/paddle/fluid/operators/unity_build_rule.cmake index 7ca431e8ea5d1..91033e2fa6707 100644 --- a/paddle/fluid/operators/unity_build_rule.cmake +++ b/paddle/fluid/operators/unity_build_rule.cmake @@ -222,7 +222,6 @@ register_unity_group( mkldnn/quantize_mkldnn_op.cc queue_generator_op.cc random_crop_op.cc - randperm_op.cc range_op.cc rank_attention_op.cc rank_loss_op.cc @@ -500,7 +499,6 @@ register_unity_group( register_unity_group( cu random_crop_op.cu - randperm_op.cu range_op.cu reverse_op.cu partial_concat_op.cu diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 98a00e6f5a9c0..90c75a8dcc6cd 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -1684,6 +1684,12 @@ tensors_name : ShapeTensorList manual_signature : [randint] +- op : randperm + outputs : + out : Out + extra : + attrs : [int seed = 0] + - op : real backward : real_grad inputs : diff --git a/paddle/phi/api/yaml/static_ops.yaml b/paddle/phi/api/yaml/static_ops.yaml index 4e0d4cfc931c5..f0f26e27c1f2c 100644 --- a/paddle/phi/api/yaml/static_ops.yaml +++ b/paddle/phi/api/yaml/static_ops.yaml @@ -260,6 +260,17 @@ param : [low, high, shape, dtype] data_type : dtype +- op : randperm + args : (int n, DataType dtype = DataType::INT64) + output : Tensor(out) + infer_meta : + func : RandpermInferMeta + param : [n, dtype] + kernel : + func : randperm + param : [n, dtype] + data_type : dtype + - op : reduce args : (Tensor x, int ring_id = 0, int root_id = 0, int reduce_type = 0) output : Tensor(out) diff --git a/paddle/phi/ops/compat/randperm_sig.cc b/paddle/phi/ops/compat/randperm_sig.cc deleted file mode 100644 index 14b28512e402a..0000000000000 --- a/paddle/phi/ops/compat/randperm_sig.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature RandpermOpArgumentMapping(const ArgumentMappingContext& ctx) { - return KernelSignature("randperm", {}, {"n", "dtype"}, {"Out"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(randperm, phi::RandpermOpArgumentMapping); From 6741dd22f90d82a230b3d0083f496e1ae64e2f50 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 11 Apr 2023 15:40:34 +0800 Subject: [PATCH 18/59] support auto generate for op average_accumulates (#52704) --- .../fluid/operators/average_accumulates_op.cc | 142 ------------------ paddle/fluid/operators/unity_build_rule.cmake | 2 - paddle/phi/api/yaml/legacy_ops.yaml | 10 -- paddle/phi/api/yaml/ops.yaml | 10 ++ .../phi/ops/compat/average_accumulates_sig.cc | 39 ----- 5 files changed, 10 insertions(+), 193 deletions(-) delete mode 100644 paddle/fluid/operators/average_accumulates_op.cc delete mode 100644 paddle/phi/ops/compat/average_accumulates_sig.cc diff --git a/paddle/fluid/operators/average_accumulates_op.cc b/paddle/fluid/operators/average_accumulates_op.cc deleted file mode 100644 index a59b78c3cd44b..0000000000000 --- a/paddle/fluid/operators/average_accumulates_op.cc +++ /dev/null @@ -1,142 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/phi/infermeta/multiary.h" - -namespace paddle { -namespace operators { - -class AverageAccumulatesOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "param"), - ctx.GetPlace()); - } -}; - -class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("param", "(Tensor), The parameter to be accumulated."); - AddInput("in_sum_1", - "(Tensor), A tensor used to store the parameter " - "sums with the same shape as input(param)."); - AddInput("in_sum_2", - "(Tensor), A auxiliary tensor to help " - "accumulating sums of parameter values with the same shape as " - "input(param). It is used to avoid loss of precision due to too " - "many sums."); - AddInput("in_sum_3", - "(Tensor), A auxiliary tensor to help " - "accumulating sums of parameter values with the same shape as " - "input(param)."); - AddInput("in_num_accumulates", - "(Tensor), The accumulating times of current window with " - "shape [1]."); - AddInput( - "in_old_num_accumulates", - "(Tensor), The accumulating times of previous window with " - "shape [1]."); - AddInput("in_num_updates", - "(Tensor), The total number of batches used by training " - "before this batch with shape [1]."); - - AddOutput("out_sum_1", - "(Tensor), A tensor used to store the " - "parameter sums with the same shape as input(param)."); - AddOutput("out_sum_2", - "(Tensor), A auxiliary tensor to help " - "accumulating sums of parameter values with the same shape as " - "input(param). It is used to avoid loss of precision due to too " - "many sums."); - AddOutput("out_sum_3", - "(Tensor), A auxiliary tensor to help " - "accumulating sums of parameter values with the same shape as " - "input(param)."); - AddOutput( - "out_num_accumulates", - "(Tensor), The accumulating times of current window with " - "shape [1]."); - AddOutput( - "out_old_num_accumulates", - "(Tensor) The accumulating times of previous window with " - "shape [1]."); - AddOutput("out_num_updates", - "(Tensor), The total number of batches used by training " - "before this batch with shape [1]."); - - AddAttr("average_window", - "(float, default 0) " - "The rate of average window size relative to num_updates.") - .SetDefault(0); - AddAttr("max_average_window", - "(int64_t) " - "Maximum size of average window. It suggests that the " - "number of mini-batches " - "in one pass is appropriate value to set."); - AddAttr("min_average_window", - "(int64_t, default 10000L) " - "Minimu size of average window.") - .SetDefault(10000L); - - AddComment(R"DOC( -AverageAccumulates Operator. -Accumulate the sum of parameter within sliding window. The size of sliding window is -determined by 'average_window', 'max_average_window' and 'min_average_window'. -Memory was shared by Input(in_sum_1) and Output(out_sum_1) which acts as an accumulator 'sum_1'. -'sum_2', 'sum_3', 'num_accumulates', 'old_num_accumulates' and 'num_updates' were the same as 'sum_1'. - -All the accumulators were inited to zero before training. - -And for a mini-batch in training, accumulators were computed as below steps: - num_updates += 1 - num_accumulates += 1 - sum_1 += param - if num_updates % kMaxNumAccumulates == 0: - sum_2 += sum_1 - sum_1 = 0 - if num_accumulates >= min_average_window && num_accumulates >= min(max_average_window, num_updates * average_window): - sum_3 = sum_1 + sum_2 - sum_1 = 0 - sum_2 = 0 - old_num_accumulates = num_accumulates - num_accumulates = 0 - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(average_accumulates, - AverageAccumulatesInferShapeFunctor, - PD_INFER_META(phi::AverageAccumulatesInferMeta)); - -REGISTER_OPERATOR( - average_accumulates, - ops::AverageAccumulatesOp, - ops::AverageAccumulatesOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - AverageAccumulatesInferShapeFunctor); diff --git a/paddle/fluid/operators/unity_build_rule.cmake b/paddle/fluid/operators/unity_build_rule.cmake index 91033e2fa6707..af90cbdfc1639 100644 --- a/paddle/fluid/operators/unity_build_rule.cmake +++ b/paddle/fluid/operators/unity_build_rule.cmake @@ -17,7 +17,6 @@ register_unity_group( assert_op.cc assign_value_op.cc attention_lstm_op.cc - average_accumulates_op.cc batch_fc_op.cc bce_loss_op.cc beam_search_op.cc @@ -385,7 +384,6 @@ register_unity_group( conv_transpose_op.cu cos_sim_op.cu crop_op.cu - average_accumulates_op.cu conj_op.cu correlation_op.cu) register_unity_group( diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 217afd146f844..b075b1935e1bb 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -153,16 +153,6 @@ data_type : dtype backend : place > output -- op : average_accumulates_ - args : (Tensor param, Tensor in_sum_1, Tensor in_sum_2, Tensor in_sum_3, Tensor in_num_accumulates, Tensor in_old_num_accumulates, Tensor in_num_updates, float average_window, int64_t max_average_window, int64_t min_average_window) - output : Tensor(out_sum_1), Tensor(out_sum_2), Tensor(out_sum_3), Tensor(out_num_accumulates), Tensor(out_old_num_accumulates), Tensor(out_num_updates) - infer_meta: - func : AverageAccumulatesInferMeta - kernel : - func : average_accumulates {dense, dense, dense, dense, dense ,dense, dense -> dense, dense, dense, dense, dense, dense} - data_type : param - inplace : (in_sum_1 -> out_sum_1), (in_sum_2 -> out_sum_2), (in_sum_3 -> out_sum_3), (in_num_accumulates -> out_num_accumulates), (in_old_num_accumulates -> out_old_num_accumulates), (in_num_updates -> out_num_updates) - - op : batch_norm args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 31f4a114b7142..aed95190bcfe5 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -185,6 +185,16 @@ data_type : x optional : ins_tag_weight +- op : average_accumulates_ + args : (Tensor param, Tensor in_sum_1, Tensor in_sum_2, Tensor in_sum_3, Tensor in_num_accumulates, Tensor in_old_num_accumulates, Tensor in_num_updates, float average_window = 0, int64_t max_average_window = INT64_MAX, int64_t min_average_window = 10000L) + output : Tensor(out_sum_1), Tensor(out_sum_2), Tensor(out_sum_3), Tensor(out_num_accumulates), Tensor(out_old_num_accumulates), Tensor(out_num_updates) + infer_meta: + func : AverageAccumulatesInferMeta + kernel : + func : average_accumulates {dense, dense, dense, dense, dense ,dense, dense -> dense, dense, dense, dense, dense, dense} + data_type : param + inplace : (in_sum_1 -> out_sum_1), (in_sum_2 -> out_sum_2), (in_sum_3 -> out_sum_3), (in_num_accumulates -> out_num_accumulates), (in_old_num_accumulates -> out_old_num_accumulates), (in_num_updates -> out_num_updates) + - op : bce_loss args : (Tensor input, Tensor label) output : Tensor diff --git a/paddle/phi/ops/compat/average_accumulates_sig.cc b/paddle/phi/ops/compat/average_accumulates_sig.cc deleted file mode 100644 index c14e8ab357553..0000000000000 --- a/paddle/phi/ops/compat/average_accumulates_sig.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { -KernelSignature AverageAccumulatesOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "average_accumulates", - {"param", - "in_sum_1", - "in_sum_2", - "in_sum_3", - "in_num_accumulates", - "in_old_num_accumulates", - "in_num_updates"}, - {"average_window", "max_average_window", "min_average_window"}, - {"out_sum_1", - "out_sum_2", - "out_sum_3", - "out_num_accumulates", - "out_old_num_accumulates", - "out_num_updates"}); -} -} // namespace phi -PD_REGISTER_ARG_MAPPING_FN(average_accumulates, - phi::AverageAccumulatesOpArgumentMapping); From dd74b3d1859d3349b32f64ff966d4d50c85c81ad Mon Sep 17 00:00:00 2001 From: Xiaoxu Chen Date: Tue, 11 Apr 2023 15:46:42 +0800 Subject: [PATCH 19/59] [prim]use Operator to reconstruct the primitive operator defined in c++ (#51997) --- .../utils/static/composite_grad_desc_maker.h | 3 + paddle/fluid/pybind/protobuf.cc | 3 +- python/paddle/fluid/backward.py | 69 ++++++++++++++----- python/paddle/fluid/framework.py | 35 ++++++++-- .../unittests/prim/test_comp_dispensable.py | 45 ++++++++++++ 5 files changed, 129 insertions(+), 26 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/prim/test_comp_dispensable.py diff --git a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h index 83b18814b19d4..b1b24af231f68 100644 --- a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h +++ b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h @@ -575,6 +575,9 @@ class CompositeGradOpMakerBase { const std::unordered_map& RuntimeAttrs() const { + LOG(WARNING) << "CompositeGradOpMaker doesn't support use runtime attrs, " + "but find the op" + << fwd_op_.Type() << "use runtime attr."; return fwd_op_.GetRuntimeAttrMap(); } diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 9661d5524140b..5493cc945cf4c 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -425,7 +425,8 @@ void BindOpDesc(pybind11::module *m) { &pd::OpDesc::SetDistAttr, pybind11::return_value_policy::reference) .def("inputs", [](pd::OpDesc &self) { return self.Inputs(); }) - .def("outputs", &pd::OpDesc::Outputs); + .def("outputs", &pd::OpDesc::Outputs) + .def("get_attr_map", &pd::OpDesc::GetAttrMap); pybind11::class_ scalar(*m, "Scalar", ""); scalar.def(py::init()) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 9a6572db72778..46f225e0d0910 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1715,35 +1715,68 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): def infershape_for_composite(block, grad_op_desc): - # pruning empty output + # NOTE: why pruning the operator with empty output here ? + # Some backward operator will output emtpy var, which will cause infer + # shape error, such assign with input's stop_gradient=True if len(grad_op_desc.output_arg_names()) == 0: return - # append op to block - op_desc = block.desc.append_op() - op_desc.copy_from(grad_op_desc) - op_desc._set_attr( - core.op_proto_and_checker_maker.kOpRoleAttrName(), - core.op_proto_and_checker_maker.OpRole.Backward, - ) - - # create output var + # create output variable new_vars = set() - # create new gradient variables - for grad_var_name in op_desc.output_arg_names(): + for grad_var_name in grad_op_desc.output_arg_names(): if not ( block.desc.has_var_recursive(grad_var_name.encode()) or grad_var_name == core.empty_var_name() ): - block.desc.var(grad_var_name.encode()) + # NOTE: stop_gradient will be set in append_op + desc = block.desc.var(grad_var_name.encode()) + block.create_var(name=grad_var_name, desc=desc, type=desc.type()) new_vars.add(grad_var_name) - # infer shape and infer dthype - op_desc.check_attrs() - op_desc.infer_var_type(block.desc) - op_desc.infer_shape(block.desc) + # NOTE For the primitive operator generated by decompositing phi grad kernel, + # we Operator to reconstruct the op_desc for reusing some complex logic, such + # as processing dispensable input, intermediate output, extra attrs, etc... + if framework.OpProtoHolder.instance().has_op_proto(grad_op_desc.type()): + op = block.append_op( + type=grad_op_desc.type(), + inputs={ + name: [block._find_var_recursive(arg) for arg in args] + for name, args in grad_op_desc.inputs().items() + }, + outputs={ + name: [block._find_var_recursive(arg) for arg in args] + for name, args in grad_op_desc.outputs().items() + }, + # NOTE Runtime attr will be ignore as the c++ GetRuntimeAttr + # interface cann't be exported to python. Please note the WARNNING + # message logged in RuntimeAttrs of composite_grad_desc_maker.h + attrs=grad_op_desc.get_attr_map(), + ) + op.desc._set_attr( + core.op_proto_and_checker_maker.kOpRoleAttrName(), + core.op_proto_and_checker_maker.OpRole.Backward, + ) + grad_op_desc.copy_from(op.desc) + # For the backward operator, we reuse the logic of _append_backward_var + else: + op_desc = block.desc.append_op() + op_desc.copy_from(grad_op_desc) + op_desc._set_attr( + core.op_proto_and_checker_maker.kOpRoleAttrName(), + core.op_proto_and_checker_maker.OpRole.Backward, + ) + op_desc.check_attrs() + op_desc.infer_var_type(block.desc) + op_desc.infer_shape(block.desc) + for arg in op_desc.output_arg_names(): + if arg in new_vars: + _infer_var_data_type_shape_(arg, block) + + grad_op_desc.copy_from(op_desc) - for arg in op_desc.output_arg_names(): + # NOTE: Some operator doesn't infer dtype correctly, this patch set the + # grad_var dtype same with corresponding forward variable. + for arg in grad_op_desc.output_arg_names(): if arg in new_vars: _infer_var_data_type_shape_(arg, block) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 708cc462e78ea..db17ea368849d 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -2916,14 +2916,35 @@ def find_name(var_list, name): for m in proto.outputs: if (m.name not in outputs) and m.dispensable: continue - if not ((m.name in outputs) or m.dispensable): - raise ValueError( - ( - "Incorrect setting for output(s) of " - "operator \"%s\", should set: [%s]." + + # FIXME: The outputs of primitive operator currently + # doesn't include intermediate output as it will be dropped + # in operator codegen, such as xshape output of reshape2. + # It will fixed when the operator codegen support + # intermediate output. + if core._is_bwd_prim_enabled(): + if not ( + (m.name in outputs) + or m.dispensable + or m.intermediate + ): + raise ValueError( + ( + "Incorrect setting for output(s) of " + "operator \"%s\", should set: [%s]." + ) + % (type, m.name) ) - % (type, m.name) - ) + else: + if not ((m.name in outputs) or m.dispensable): + raise ValueError( + ( + "Incorrect setting for output(s) of " + "operator \"%s\", should set: [%s]." + ) + % (type, m.name) + ) + for out_proto in proto.outputs: if out_proto.name not in outputs: continue diff --git a/python/paddle/fluid/tests/unittests/prim/test_comp_dispensable.py b/python/paddle/fluid/tests/unittests/prim/test_comp_dispensable.py new file mode 100644 index 0000000000000..a4f4df5fdd1c5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/prim/test_comp_dispensable.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle + + +class TestDispensable(unittest.TestCase): + def setUp(self): + paddle.fluid.core._set_prim_all_enabled(True) + + def tearDown(self): + paddle.fluid.core._set_prim_all_enabled(False) + + def test_dispensable(self): + @paddle.jit.to_static + def f(x): + return paddle.split(x, num_or_sections=2) + + f = paddle.jit.to_static(f) + x = paddle.rand((8,)) + x.stop_gradient = False + + op = f.get_concrete_program(x)[1].backward_program.block(0).ops[-1] + self.assertEqual( + op.attr('op_role'), + int(paddle.fluid.core.op_proto_and_checker_maker.OpRole.Backward), + ) + self.assertIn('AxisTensor', op.input_names) + + +if __name__ == '__main__': + unittest.main() From 327c0e4dfc8c32cceed653499422dea4713e2c54 Mon Sep 17 00:00:00 2001 From: cyber-pioneer <116002591+cyber-pioneer@users.noreply.github.com> Date: Tue, 11 Apr 2023 16:28:46 +0800 Subject: [PATCH 20/59] [Prim] Reset base prim resnet ci result (#52752) * change base res * fix prim cinn res --- test/prim/model/test_resnet_prim_cinn.py | 37 ++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/test/prim/model/test_resnet_prim_cinn.py b/test/prim/model/test_resnet_prim_cinn.py index deda6671d52a8..2012d84546e64 100644 --- a/test/prim/model/test_resnet_prim_cinn.py +++ b/test/prim/model/test_resnet_prim_cinn.py @@ -70,15 +70,15 @@ # The results in ci as as follows: DY2ST_PRIM_GT = [ 5.82879114151001, - 8.333706855773926, - 5.07769250869751, - 8.66937255859375, - 8.411705017089844, - 7.252340793609619, - 9.683248519897461, - 8.177335739135742, - 8.195427894592285, - 10.219732284545898, + 8.33370590209961, + 5.091761589050293, + 8.776082992553711, + 8.274380683898926, + 7.546653747558594, + 9.607137680053711, + 8.27371597290039, + 8.429732322692871, + 10.362630844116211, ] DY2ST_CINN_GT = [ 5.828789710998535, @@ -92,17 +92,18 @@ 8.383116722106934, 10.120304107666016, ] + DY2ST_PRIM_CINN_GT = [ 5.828784942626953, - 8.341737747192383, - 5.113619327545166, - 8.625601768493652, - 8.082450866699219, - 7.4913249015808105, - 9.858025550842285, - 8.287693977355957, - 8.435812950134277, - 10.372406005859375, + 8.34173583984375, + 5.116049289703369, + 8.511833190917969, + 7.9524407386779785, + 7.395752906799316, + 9.666715621948242, + 8.277752876281738, + 8.718518257141113, + 10.199666023254395, ] if core.is_compiled_with_cuda(): From 259b0aadb60c1530bd3996eae22870ca7e749bf5 Mon Sep 17 00:00:00 2001 From: wz1qqx <55830058+wz1qqx@users.noreply.github.com> Date: Tue, 11 Apr 2023 16:51:20 +0800 Subject: [PATCH 21/59] [XPU] fix error pattern and rename max name (#52726) --- .../framework/ir/xpu/conv2d_xpu_fuse_pass.cc | 56 ++++++++++++------- paddle/phi/api/yaml/fused_ops.yaml | 8 +-- paddle/phi/infermeta/fusion.cc | 14 ++--- paddle/phi/infermeta/fusion.h | 8 +-- .../kernels/fusion/xpu/conv2d_xpu_kernel.cc | 25 ++++----- 5 files changed, 63 insertions(+), 48 deletions(-) diff --git a/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc index f124c3cc44adf..0b591120014e3 100644 --- a/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc @@ -99,13 +99,15 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, auto conv = pattern->NewNode(conv_repr())->assert_is_op(conv_type_); auto input = pattern->NewNode(input_repr()) ->assert_is_op_input(conv_type_, "Input") - ->AsInput(); + ->AsInput() + ->assert_more([](Node* node) { + return node->Var()->GetShape().size() == 4; + }); auto conv_filter = pattern->NewNode(conv_filter_repr()) ->assert_is_op_input(conv_type_, "Filter") ->AsInput(); auto conv_out = pattern->NewNode(conv_out_repr()) - ->assert_is_op_output(conv_type_, "Output") - ->assert_var_not_persistable(); + ->assert_is_op_output(conv_type_, "Output"); conv->LinksFrom({input, conv_filter}).LinksTo({conv_out}); // ew_bias_add op PDNode* ew_bias_add = nullptr; @@ -116,11 +118,17 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, ew_bias_add_y = pattern->NewNode(ew_bias_add_y_repr()) ->assert_is_op_input("elementwise_add", "Y") ->assert_is_persistable_var() - ->assert_has_n_outputs(1); + ->assert_has_n_outputs(1) + ->assert_more([](Node* node) { + return node->Var()->GetShape().size() == 1; + }); ew_bias_add = pattern->NewNode(ew_bias_add_repr())->assert_is_op("elementwise_add"); ew_bias_add_out = pattern->NewNode(ew_bias_add_out_repr()) ->assert_is_op_output("elementwise_add", "Out"); + if (with_bn_ || with_branch_ || !act_type_.empty()) { + ew_bias_add_out->assert_has_n_outputs(1); + } ew_bias_add->LinksFrom({conv_out, ew_bias_add_y}) .LinksTo({ew_bias_add_out}); } else { @@ -159,6 +167,9 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, bn = pattern->NewNode(bn_repr())->assert_is_op("batch_norm"); bn_out = pattern->NewNode(bn_out_repr())->assert_is_op_output("batch_norm", "Y"); + if (with_branch_ || !act_type_.empty()) { + bn_out->assert_has_n_outputs(1); + } bn_mean_out = pattern->NewNode(bn_mean_out_repr()) ->assert_is_op_output("batch_norm", "MeanOut"); bn_saved_mean = pattern->NewNode(bn_saved_mean_repr()) @@ -179,23 +190,27 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, bn_out->assert_is_op_input("elementwise_add", "Y")->AsIntermediate(); ew_branch_add_in = pattern->NewNode(ew_branch_add_in_repr()) ->assert_is_op_input("elementwise_add", "X") - ->AsInput() - ->assert_more([](Node* node) { - return node->Var()->GetShape().size() == 4; - }); + ->AsInput(); } else if (with_branch_y_) { bn_out->assert_is_op_input("elementwise_add", "X")->AsIntermediate(); ew_branch_add_in = pattern->NewNode(ew_branch_add_in_repr()) ->assert_is_op_input("elementwise_add", "Y") - ->AsInput() - ->assert_more([](Node* node) { - return node->Var()->GetShape().size() == 4; - }); + ->AsInput(); } - ew_branch_add = - pattern->NewNode(ew_branch_add_repr())->assert_is_op("elementwise_add"); + ew_branch_add = pattern->NewNode(ew_branch_add_repr()) + ->assert_is_op("elementwise_add") + ->assert_more([](Node* node) { + if (node->inputs.size() != 2) { + return false; + } + return node->inputs[0]->Var()->GetShape() == + node->inputs[1]->Var()->GetShape(); + }); ew_branch_add_out = pattern->NewNode(ew_branch_add_out_repr()) ->assert_is_op_output("elementwise_add", "Out"); + if (!act_type_.empty()) { + ew_branch_add_out->assert_has_n_outputs(1); + } ew_branch_add->LinksFrom({bn_out, ew_branch_add_in}) .LinksTo({ew_branch_add_out}); } else { @@ -401,6 +416,7 @@ int Conv2dXPUFusePass::ApplyImpl(ir::Graph* graph, scope->FindVar(conv_filter->Name())->GetMutable(); auto filter_dims = filter_t->dims(); bool has_bias = with_bn || with_conv_bias; + bool has_branch = with_branch_x || with_branch_y; // Create conv_fusion_bias (conv bias) variable Node* fusion_bias_node = nullptr; if (has_bias) { @@ -501,18 +517,17 @@ int Conv2dXPUFusePass::ApplyImpl(ir::Graph* graph, framework::OpDesc conv2d_xpu_op_desc(block); // set input&output var conv2d_xpu_op_desc.SetType("conv2d_xpu"); - conv2d_xpu_op_desc.SetInput("input", {input->Name()}); + conv2d_xpu_op_desc.SetInput("x", {input->Name()}); conv2d_xpu_op_desc.SetInput("filter", {filter_int16->Name()}); conv2d_xpu_op_desc.SetInput("filter_max", {filter_max->Name()}); - conv2d_xpu_op_desc.SetOutput("output", {conv2d_xpu_out_name}); - conv2d_xpu_op_desc.SetOutput("output_max", {conv_out_max_name}); + conv2d_xpu_op_desc.SetOutput("out", {conv2d_xpu_out_name}); + conv2d_xpu_op_desc.SetOutput("out_max", {conv_out_max_name}); // set fusion_bias input node if (has_bias) { conv2d_xpu_op_desc.SetInput("bias", {fusion_bias_node->Name()}); - conv2d_xpu_op_desc.SetAttr("has_bias", has_bias); } // set ew_branch_add input node - if (ew_branch_add_in != nullptr) { + if (ew_branch_add != nullptr) { conv2d_xpu_op_desc.SetInput("branch", {ew_branch_add_in->Name()}); } // set attrs of conv2d_xpu @@ -566,7 +581,8 @@ int Conv2dXPUFusePass::ApplyImpl(ir::Graph* graph, conv2d_xpu_op_desc.SetAttr("place_z", std::vector{10}); conv2d_xpu_op_desc.SetAttr("paddings", conv_paddings); conv2d_xpu_op_desc.SetAttr("block_lod", std::vector{1}); - conv2d_xpu_op_desc.SetAttr("has_branch", with_branch_x || with_branch_y); + conv2d_xpu_op_desc.SetAttr("has_branch", has_branch); + conv2d_xpu_op_desc.SetAttr("has_bias", has_bias); auto* conv2d_xpu = graph->CreateOpNode(&conv2d_xpu_op_desc); IR_NODE_LINK_TO(input, conv2d_xpu); diff --git a/paddle/phi/api/yaml/fused_ops.yaml b/paddle/phi/api/yaml/fused_ops.yaml index c9fae2a81e3b7..b43d02fced54c 100644 --- a/paddle/phi/api/yaml/fused_ops.yaml +++ b/paddle/phi/api/yaml/fused_ops.yaml @@ -5,14 +5,14 @@ # otherwise the operator only could be used in static mode. - op : conv2d_xpu - args : (Tensor input, Tensor input_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, bool has_bias, bool has_branch, int act_type, float act_param) - output : Tensor(output), Tensor(output_max) + args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, bool has_bias, bool has_branch, int act_type, float act_param) + output : Tensor(out), Tensor(out_max) infer_meta : func : Conv2dXPUInferMeta kernel : func : conv2d_xpu - data_type : input - optional : bias, branch, input_max + data_type : x + optional : bias, branch, x_max - op : embedding_with_eltwise_add_xpu args : (Tensor[] ids, Tensor[] tables, int64_t padding_idx) diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index ad8409487bb58..5c0aa3b8e89fd 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -35,8 +35,8 @@ inline int ConvOutSize(int input_size, return output_size; } -void Conv2dXPUInferMeta(const MetaTensor& input, - const MetaTensor& input_max, +void Conv2dXPUInferMeta(const MetaTensor& x, + const MetaTensor& x_max, const MetaTensor& filter, const MetaTensor& filter_max, const MetaTensor& bias, @@ -50,9 +50,9 @@ void Conv2dXPUInferMeta(const MetaTensor& input, bool has_branch, int act_type, float act_param, - MetaTensor* output, - MetaTensor* output_max) { - auto in_dims = input.dims(); + MetaTensor* out, + MetaTensor* out_max) { + auto in_dims = x.dims(); auto filter_dims = filter.dims(); // do some checks PADDLE_ENFORCE_EQ( @@ -157,8 +157,8 @@ void Conv2dXPUInferMeta(const MetaTensor& input, strides[i])); } // set output and output max dims - output->set_dims(DDim(out_shape.data(), out_shape.size())); - output_max->set_dims(phi::make_ddim({4})); + out->set_dims(DDim(out_shape.data(), out_shape.size())); + out_max->set_dims(phi::make_ddim({4})); } void EmbeddingWithEltwiseAddXPUInferMeta( diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h index 9dcf7342ae193..3105ea8a6d578 100644 --- a/paddle/phi/infermeta/fusion.h +++ b/paddle/phi/infermeta/fusion.h @@ -22,8 +22,8 @@ namespace phi { // Common InferMeta Functions for fusion operators. // NOTE: The InferMeta Functions in this file are arranged in alphabetic order. -void Conv2dXPUInferMeta(const MetaTensor& input, - const MetaTensor& input_max, +void Conv2dXPUInferMeta(const MetaTensor& x, + const MetaTensor& x_max, const MetaTensor& filter, const MetaTensor& filter_max, const MetaTensor& bias, @@ -37,8 +37,8 @@ void Conv2dXPUInferMeta(const MetaTensor& input, bool has_branch, int act_type, float act_param, - MetaTensor* output, - MetaTensor* output_max); + MetaTensor* out, + MetaTensor* out_max); void EmbeddingWithEltwiseAddXPUInferMeta( const std::vector& ids, diff --git a/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc index 9da39097e0f8d..0f7d8902de328 100644 --- a/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc @@ -21,8 +21,8 @@ namespace fusion { template void Conv2dXPUKernel(const Context& ctx, - const DenseTensor& input, - const paddle::optional& input_max, + const DenseTensor& x, + const paddle::optional& x_max, const DenseTensor& filter, const DenseTensor& filter_max, const paddle::optional& bias, @@ -36,10 +36,10 @@ void Conv2dXPUKernel(const Context& ctx, bool has_branch, int act_type, float act_param, - DenseTensor* output, - DenseTensor* output_max) { + DenseTensor* out, + DenseTensor* out_max) { using XPUType = typename XPUTypeTrait::Type; - auto input_dims = input.dims(); + auto input_dims = x.dims(); auto filter_dims = filter.dims(); // update paddings and dilations accoring to padding_algorithm std::vector paddings_vec = paddings; @@ -62,17 +62,16 @@ void Conv2dXPUKernel(const Context& ctx, int win_h = static_cast(filter_dims[2]); int win_w = static_cast(filter_dims[3]); - auto* input_data = reinterpret_cast(input.data()); - const float* input_max_data = input_max.get_ptr() == nullptr - ? nullptr - : input_max.get_ptr()->data(); + auto* input_data = reinterpret_cast(x.data()); + const float* input_max_data = + x_max.get_ptr() == nullptr ? nullptr : x_max.get_ptr()->data(); auto* branch_data = branch.get_ptr() == nullptr ? nullptr : reinterpret_cast(branch.get_ptr()->data()); const float* bias_data = bias.get_ptr() == nullptr ? nullptr : bias.get_ptr()->data(); - auto* out_data = reinterpret_cast(ctx.template Alloc(output)); + auto* out_data = reinterpret_cast(ctx.template Alloc(out)); xpu::Activation_t act(static_cast(act_type)); if (act_type == xpu::Activation_t::LEAKY_RELU) { @@ -98,13 +97,13 @@ void Conv2dXPUKernel(const Context& ctx, /* int64_t groups */ groups, /* const float* in_maxptr */ input_max_data, /* const float* filter_maxptr */ filter_max.data(), - /* float* out_maxptr */ ctx.template Alloc(output_max), + /* float* out_maxptr */ ctx.template Alloc(out_max), /* bool is_nchw */ true, /* const float* bias */ bias_data, /* const TY* branch */ branch_data, /* const baidu::xpu::api::Activation_t& act */ act, - /* const float* branch_maxptr */ nullptr); - // /* const float* scale */ nullptr); + /* const float* branch_maxptr */ nullptr, + /* const float* scale */ nullptr); PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_xpu"); } From 10fd4a95b30195e0cff4fbaa6d2a0cd8039ce129 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Tue, 11 Apr 2023 17:04:52 +0800 Subject: [PATCH 22/59] [Paddle Inference] Predictor support paddle::Tensor (#50445) --- cmake/phi_header.cmake | 34 ++-- paddle/fluid/framework/feed_fetch_method.cc | 2 +- .../fluid/inference/api/analysis_predictor.cc | 151 ++++++++++++++++-- .../fluid/inference/api/analysis_predictor.h | 42 ++++- .../inference/api/demo_ci/CMakeLists.txt | 2 +- paddle/fluid/inference/api/paddle_api.h | 10 ++ .../inference/api/paddle_inference_api.h | 11 ++ paddle/fluid/inference/api/paddle_tensor.h | 2 + paddle/fluid/jit/engine/predictor_engine.cc | 136 +--------------- paddle/fluid/pybind/eager_utils.cc | 4 + paddle/fluid/pybind/inference_api.cc | 31 ++-- paddle/phi/api/include/tensor.h | 6 +- paddle/phi/api/lib/api_custom_impl.cc | 1 + .../inference/test_inference_predictor_run.py | 128 +++++++++++++++ 14 files changed, 382 insertions(+), 178 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py diff --git a/cmake/phi_header.cmake b/cmake/phi_header.cmake index d5000eadbd14d..b1476761897ea 100644 --- a/cmake/phi_header.cmake +++ b/cmake/phi_header.cmake @@ -17,24 +17,21 @@ set(PADDLE_INFERENCE_INSTALL_DIR function(phi_header_path_compat TARGET_PATH) message(STATUS "phi header path compat processing: ${TARGET_PATH}") - string(FIND ${TARGET_PATH} "experimental" pos) - if(pos GREATER 1) - file(GLOB HEADERS "${TARGET_PATH}/*" "*.h") - foreach(header ${HEADERS}) - if(${header} MATCHES ".*.h$") - file(READ ${header} HEADER_CONTENT) - string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" - HEADER_CONTENT "${HEADER_CONTENT}") - string(REPLACE "paddle/fluid/platform/" - "paddle/include/experimental/phi/" HEADER_CONTENT - "${HEADER_CONTENT}") - string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" - HEADER_CONTENT "${HEADER_CONTENT}") - file(WRITE ${header} "${HEADER_CONTENT}") - message(STATUS "phi header path compat processing complete: ${header}") - endif() - endforeach() - endif() + file(GLOB HEADERS "${TARGET_PATH}/*" "*.h") + foreach(header ${HEADERS}) + if(${header} MATCHES ".*.h$") + file(READ ${header} HEADER_CONTENT) + string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" + HEADER_CONTENT "${HEADER_CONTENT}") + string(REPLACE "paddle/fluid/platform/" + "paddle/include/experimental/phi/" HEADER_CONTENT + "${HEADER_CONTENT}") + string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" + HEADER_CONTENT "${HEADER_CONTENT}") + file(WRITE ${header} "${HEADER_CONTENT}") + message(STATUS "phi header path compat processing complete: ${header}") + endif() + endforeach() endfunction() phi_header_path_compat( @@ -51,6 +48,7 @@ phi_header_path_compat( ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common) phi_header_path_compat( ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core) +phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/) # In order to be compatible with the original behavior, the header file name needs to be changed file(RENAME diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index f21ca0c858acc..0294e1ca54b43 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -95,7 +95,7 @@ phi::DenseTensor& GetVariableTensor(const Scope& scope, PADDLE_ENFORCE_EQ(var->IsType(), true, platform::errors::InvalidArgument( - "Only support lod tensor in GetVariableTensor now.")); + "Only support DenseTensor in GetVariableTensor now.")); return *var->GetMutable(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index b07c47b81eff4..38222b797f14f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -155,11 +155,10 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) { return phi::Backend::CPU; } } -} // namespace -bool PaddleTensorToLoDTensor(const PaddleTensor &pt, - phi::DenseTensor *t, - const platform::Place &place) { +bool PaddleTensorToDenseTensor(const PaddleTensor &pt, + phi::DenseTensor *t, + const platform::Place &place) { framework::DDim ddim = phi::make_ddim(pt.shape); void *input_ptr; if (pt.dtype == PaddleDType::INT64) { @@ -270,6 +269,7 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, t->set_lod(lod); return true; } +} // namespace bool AnalysisPredictor::Init( const std::shared_ptr &parent_scope, @@ -919,6 +919,17 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { #endif } +void AnalysisPredictor::MkldnnPreSet( + const std::vector &inputs) { +#ifdef PADDLE_WITH_MKLDNN + std::vector> inputs_shape; + for (size_t i = 0; i < inputs.size(); ++i) { + inputs_shape.emplace_back(phi::vectorize(inputs[i].dims())); + } + MkldnnPreSet(inputs_shape); +#endif +} + void AnalysisPredictor::MkldnnPreSet( const std::vector> &inputs_shape) { #ifdef PADDLE_WITH_MKLDNN @@ -1033,6 +1044,70 @@ bool AnalysisPredictor::Run(const std::vector &inputs, return true; } +bool AnalysisPredictor::Run(const std::vector &inputs, + std::vector *outputs) { + inference::DisplayMemoryInfo(place_, "before run"); + paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPreSet(inputs); +#endif + VLOG(3) << "predict start"; + // set feed variable + framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get(); + PADDLE_ENFORCE_NOT_NULL( + scope, + platform::errors::PreconditionNotMet("The scope should not be nullptr.")); + if (!SetFeed(inputs, scope)) { + LOG(ERROR) << "fail to set feed"; + return false; + } + +#ifdef PADDLE_WITH_TENSORRT + if (config_.tensorrt_engine_enabled()) { + inference::tensorrt::TensorRTEngine::predictor_id_per_thread = + predictor_id_; + VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: " + << inference::tensorrt::TensorRTEngine::predictor_id_per_thread; + } +#endif + + // Run the inference program + // if share variables, we need not create variables + executor_->Run(); + + inference::DisplayMemoryInfo(place_, "after run"); + + // get fetch variable + if (!GetFetch(outputs, scope)) { + LOG(ERROR) << "fail to get fetches"; + return false; + } + + // All the containers in the scope will be hold in inference, but the + // operators assume that the container will be reset after each batch. + // Here is a bugfix, collect all the container variables, and reset then to a + // bool; the next time, the operator will call MutableData and construct a new + // container again, so that the container will be empty for each batch. + if (sub_scope_) { + tensor_array_batch_cleaner_.CollectNoTensorVars(sub_scope_); + } + tensor_array_batch_cleaner_.ResetNoTensorVars(); + + // recover the cpu_math_library_num_threads to 1, in order to avoid thread + // conflict when integrating it into deployment service. + paddle::platform::SetNumThreads(1); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPostReset(); +#endif +#if defined(PADDLE_WITH_MKLML) + // Frees unused memory allocated by the Intel® MKL Memory Allocator to + // avoid memory leak. See: + // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers + platform::dynload::MKL_Free_Buffers(); +#endif + return true; +} + bool AnalysisPredictor::SetFeed(const std::vector &inputs, framework::Scope *scope) { VLOG(3) << "Predictor::set_feed"; @@ -1047,7 +1122,7 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, for (size_t i = 0; i < inputs.size(); ++i) { phi::DenseTensor *input = &feed_tensors_[i]; - if (!PaddleTensorToLoDTensor(inputs[i], input, place_)) { + if (!PaddleTensorToDenseTensor(inputs[i], input, place_)) { return false; } int idx = -1; @@ -1061,7 +1136,41 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, } else { idx = PADDLE_GET_CONST(int, feeds_[i]->GetAttr("col")); } - framework::SetFeedVariable(scope, *input, "feed", idx); + framework::SetFeedVariable(scope, *input, framework::kFeedOpType, idx); + } + return true; +} + +bool AnalysisPredictor::SetFeed(const std::vector &inputs, + framework::Scope *scope) { + VLOG(3) << "Predictor::set_feed"; + PADDLE_ENFORCE_EQ(inputs.size(), + feeds_.size(), + platform::errors::InvalidArgument( + "wrong feed input size, need %d but get %d.", + feeds_.size(), + inputs.size())); + for (size_t i = 0; i < inputs.size(); ++i) { + PADDLE_ENFORCE_EQ(inputs[i].initialized(), + true, + paddle::platform::errors::InvalidArgument( + "The input Tensor expected to be initialized.")); + } + + if (std::all_of(inputs.cbegin(), inputs.cend(), [&](const paddle::Tensor &t) { + return !t.name().empty() && feed_names_.count(t.name()); + })) { + for (size_t i = 0; i < inputs.size(); ++i) { + auto &t = framework::GetVariableTensor(*scope, inputs[i].name()); + t.ShareDataWith( + *std::dynamic_pointer_cast(inputs[i].impl())); + } + } else { + for (size_t i = 0; i < inputs.size(); ++i) { + auto &t = framework::GetVariableTensor(*scope, idx2feeds_[i]); + t.ShareDataWith( + *std::dynamic_pointer_cast(inputs[i].impl())); + } } return true; } @@ -1100,7 +1209,7 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, idx, i)); framework::FetchType &fetch_var = - framework::GetFetchVariable(*scope, "fetch", idx); + framework::GetFetchVariable(*scope, framework::kFetchOpType, idx); auto &fetch = PADDLE_GET(phi::DenseTensor, fetch_var); auto type = framework::TransToProtoVarType(fetch.dtype()); auto output = &(outputs->at(i)); @@ -1125,6 +1234,19 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, return true; } +bool AnalysisPredictor::GetFetch(std::vector *outputs, + framework::Scope *scope) { + VLOG(3) << "Predictor::get_fetch"; + outputs->resize(fetches_.size()); + for (size_t i = 0; i < fetches_.size(); ++i) { + auto const &name = idx2fetches_[i]; + auto &t = framework::GetVariableTensor(*scope, name); + (*outputs)[i] = + std::move(paddle::Tensor(std::make_shared(t), name)); + } + return true; +} + void AnalysisPredictor::PrepareArgument() { VLOG(3) << "AnalysisPredictor::PrepareArgument"; // Init std::unique_ptr argument_. @@ -1579,7 +1701,7 @@ void AnalysisPredictor::PrepareFeedFetch() { "The sub_scope should not be nullptr.")); CreateFeedFetchVar(sub_scope_); for (auto *op : inference_program_->Block(0).AllOps()) { - if (op->Type() == "feed") { + if (op->Type() == framework::kFeedOpType) { int idx = PADDLE_GET_CONST(int, op->GetAttr("col")); if (feeds_.size() <= static_cast(idx)) { feeds_.resize(idx + 1); @@ -1587,7 +1709,7 @@ void AnalysisPredictor::PrepareFeedFetch() { feeds_[idx] = op; feed_names_[op->Output("Out")[0]] = idx; idx2feeds_[idx] = op->Output("Out")[0]; - } else if (op->Type() == "fetch") { + } else if (op->Type() == framework::kFetchOpType) { int idx = PADDLE_GET_CONST(int, op->GetAttr("col")); if (fetches_.size() <= static_cast(idx)) { fetches_.resize(idx + 1); @@ -1602,9 +1724,9 @@ void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) { PADDLE_ENFORCE_NOT_NULL( scope, platform::errors::InvalidArgument("The scope should not be nullptr.")); - auto *var = scope->Var("feed"); + auto *var = scope->Var(framework::kFeedOpType); var->GetMutable(); - var = scope->Var("fetch"); + var = scope->Var(framework::kFetchOpType); var->GetMutable(); } @@ -2186,7 +2308,7 @@ void AnalysisPredictor::ClearIntermediateTensor() { const std::string name = var->Name(); auto *variable = executor_->GetScope()->FindVar(name); if (variable != nullptr && variable->IsType() && - name != "feed" && name != "fetch") { + name != framework::kFeedOpType && name != framework::kFetchOpType) { VLOG(3) << "Clear Intermediate Tensor: " << name; auto *t = variable->GetMutable(); t->clear(); @@ -2653,6 +2775,11 @@ std::map Predictor::GetOutputTypes() { bool Predictor::Run() { return predictor_->ZeroCopyRun(); } +bool Predictor::Run(const std::vector &inputs, + std::vector *outputs) { + return predictor_->Run(inputs, outputs); +} + std::unique_ptr Predictor::Clone(void *stream) { auto analysis_pred = predictor_->Clone(stream); std::unique_ptr pred(new Predictor(std::move(analysis_pred))); diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 5a578a9b94fcb..83207a8bfd654 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -31,15 +31,14 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/resource_manager.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/string/printf.h" +#include "paddle/phi/core/dense_tensor.h" #ifdef PADDLE_WITH_TESTING #include #include #endif namespace paddle_infer { -using float16 = paddle::platform::float16; namespace experimental { class InternalUtils; }; @@ -150,6 +149,16 @@ class AnalysisPredictor : public PaddlePredictor { std::vector *output_data, int batch_size = -1) override; + /// + /// \brief Run the prediction engine (Recommended). + /// + /// \param[in] inputs input tensors + /// \param[out] outputs output tensors + /// \return Whether the function executed successfully + /// + bool Run(const std::vector &inputs, + std::vector *outputs) override; + /// /// \brief Get the input names /// @@ -378,6 +387,17 @@ class AnalysisPredictor : public PaddlePredictor { /// bool SetFeed(const std::vector &input_datas, framework::Scope *scope); + + /// + /// \brief Prepare input data, only used in Run() + /// + /// \param[in] inputs inpute tensors + /// \param[in] scope the scope used by predictor + /// \return Whether the function executed successfully + /// + bool SetFeed(const std::vector &inputs, + framework::Scope *scope); + /// /// \brief Get the output data, only used in Run() /// @@ -387,6 +407,16 @@ class AnalysisPredictor : public PaddlePredictor { /// bool GetFetch(std::vector *output_data, framework::Scope *scope); + + /// + /// \brief Get the output data, only used in Run() + /// + /// \param[out] outputs output tensors + /// \param[in] scope the scope used by predictor + /// \return Whether the function executed successfully + /// + bool GetFetch(std::vector *outputs, framework::Scope *scope); + /// /// \brief Get the output data, only used in GetFetch() /// @@ -404,6 +434,14 @@ class AnalysisPredictor : public PaddlePredictor { /// \param[in] inputs tensors /// void MkldnnPreSet(const std::vector &inputs); + /// + /// \brief PreSet for Mkldnn multi-thread and dynamic shape input. + /// + /// Used in AnalysisPredictor::Run(). + /// + /// \param[in] inputs tensors + /// + void MkldnnPreSet(const std::vector &inputs); /// /// \brief PreSet for Mkldnn multi-thread and dynamic shape input. diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index fc23caee65638..11f214bc45d53 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -83,7 +83,7 @@ else() if(WITH_MKL) set(FLAG_OPENMP "-fopenmp") endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 ${FLAG_OPENMP}") endif() if(WITH_GPU) diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index e83c1a9f9444c..3a51f91b3afc2 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -221,6 +221,16 @@ class PD_INFER_DECL PaddlePredictor { std::vector* output_data, int batch_size = -1) = 0; + /// \brief This interface takes input and runs the network (Recommended). + /// \param[in] inputs An list of Tensor as the input to the network. + /// \param[out] output_data Pointer to the tensor list, which holds the output + /// Tensor + /// \return Whether the run is successful + virtual bool Run(const std::vector& inputs, + std::vector* outputs) { + return false; + } + /// \brief Used to get the name of the network input. /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios. /// \return Input tensor names. diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index d7f15e0529894..54a9d9af117ca 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -128,6 +128,17 @@ class PD_INFER_DECL Predictor { /// bool Run(); + /// + /// \brief Run the prediction engine (Recommended) + /// + /// \param[in] inputs An list of Tensor as the input to the network. + /// \param[out] outputs Pointer to the tensor list, which holds the output + /// Tensor + /// + /// \return Whether the run is successful + bool Run(const std::vector& inputs, + std::vector* outputs); + /// /// \brief Get the output names /// diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 0301892792dc3..b9c86a60f55b8 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -21,6 +21,8 @@ #include "paddle_infer_declare.h" // NOLINT +#include "paddle/phi/api/include/tensor.h" // expose paddle::Tensor + #ifdef PADDLE_WITH_ONNXRUNTIME #include "onnxruntime_c_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT diff --git a/paddle/fluid/jit/engine/predictor_engine.cc b/paddle/fluid/jit/engine/predictor_engine.cc index d18f4f487dbe2..54e35bc0f69dd 100644 --- a/paddle/fluid/jit/engine/predictor_engine.cc +++ b/paddle/fluid/jit/engine/predictor_engine.cc @@ -22,11 +22,6 @@ namespace paddle { namespace jit { -static PaddleTensor DenseTensorToPaddleTensor(DenseTensor *t); -static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, - DenseTensor *t, - const platform::Place &place); - PredictorEngine::PredictorEngine( const std::shared_ptr &info, const std::shared_ptr ¶ms_dict, @@ -52,6 +47,7 @@ PredictorEngine::PredictorEngine( config.SetSkipLoadParams(true); config.SetApplyOptim(true); config.SwitchIrOptim(true); + config.SwitchUseFeedFetchOps(false); predictor_.reset(new AnalysisPredictor(config)); @@ -78,135 +74,15 @@ std::unique_ptr PredictorEngine::Clone(void *stream) { std::vector PredictorEngine::operator()( const std::vector &inputs) { - auto dense_tensors = utils::ToDenseTensors(inputs); - return utils::ToTensors(this->operator()(dense_tensors)); -} - -std::vector PredictorEngine::operator()( - const std::vector &inputs) { - std::vector pt_inputs; - std::vector pt_outputs; - for (auto &t : inputs) { - auto non_const_t = const_cast(&t); - pt_inputs.emplace_back(DenseTensorToPaddleTensor(non_const_t)); - } - - predictor_->Run(pt_inputs, &pt_outputs); - - std::vector outputs; - for (auto &pt : pt_outputs) { - DenseTensor t; - PaddleTensorToDenseTensor(pt, &t, place_); - outputs.emplace_back(t); - } + std::vector outputs; + predictor_->Run(inputs, &outputs); return outputs; } -static PaddleTensor DenseTensorToPaddleTensor(DenseTensor *t) { - PaddleTensor pt; - switch (framework::TransToProtoVarType(t->dtype())) { - case framework::proto::VarType::INT32: { - pt.data.Reset(t->data(), t->numel() * sizeof(int32_t)); - pt.dtype = PaddleDType::INT32; - } break; - case framework::proto::VarType::INT64: { - pt.data.Reset(t->data(), t->numel() * sizeof(int64_t)); - pt.dtype = PaddleDType::INT64; - } break; - case framework::proto::VarType::FP32: { - pt.data.Reset(t->data(), t->numel() * sizeof(float)); - pt.dtype = PaddleDType::FLOAT32; - } break; - default: - PADDLE_THROW( - platform::errors::Unimplemented("Unsupported tensor date type. Now " - "only supports INT64, FP32, INT32.")); - } - pt.shape = phi::vectorize(t->dims()); - return pt; -} - -static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, - DenseTensor *t, - const platform::Place &place) { - framework::DDim ddim = phi::make_ddim(pt.shape); - void *input_ptr; - switch (pt.dtype) { - case PaddleDType::INT64: - input_ptr = t->mutable_data(ddim, place); - break; - case PaddleDType::FLOAT32: - input_ptr = t->mutable_data(ddim, place); - break; - case PaddleDType::INT32: - input_ptr = t->mutable_data(ddim, place); - break; - case PaddleDType::FLOAT16: - input_ptr = t->mutable_data(ddim, place); - break; - default: - LOG(ERROR) << "unsupported feed type " << pt.dtype; - return false; - } - - PADDLE_ENFORCE_NOT_NULL( - input_ptr, - paddle::platform::errors::Fatal( - "Cannot convert to LoDTensor because LoDTensor creation failed.")); - PADDLE_ENFORCE_NOT_NULL( - pt.data.data(), - paddle::platform::errors::InvalidArgument( - "The data contained in the input PaddleTensor is illegal.")); - - if (platform::is_cpu_place(place)) { - // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. - std::memcpy( - static_cast(input_ptr), pt.data.data(), pt.data.length()); - } else if (platform::is_ipu_place(place)) { -#ifdef PADDLE_WITH_IPU - std::memcpy( - static_cast(input_ptr), pt.data.data(), pt.data.length()); -#else - PADDLE_THROW(paddle::platform::errors::Fatal( - "Not compile with WITH_IPU, should not reach here.")); -#endif - } else if (platform::is_gpu_place(place)) { - PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), - false, - platform::errors::InvalidArgument( - "Only one choice can be made between CPU and XPU.")); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto *dev_ctx = static_cast(pool.Get(place)); - auto dst_gpu_place = place; - memory::Copy(dst_gpu_place, - static_cast(input_ptr), - platform::CPUPlace(), - pt.data.data(), - pt.data.length(), - dev_ctx->stream()); -#else - PADDLE_THROW(paddle::platform::errors::Fatal( - "Not compile with CUDA, should not reach here.")); -#endif - } else if (platform::is_xpu_place(place)) { -#ifdef PADDLE_WITH_XPU - auto dst_xpu_place = place; - memory::Copy(dst_xpu_place, - static_cast(input_ptr), - platform::CPUPlace(), - pt.data.data(), - pt.data.length()); -#else - PADDLE_THROW(paddle::platform::errors::Fatal( - "Not compile with XPU, should not reach here.")); -#endif - } else { - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "The analysis predictor supports CPU, GPU and XPU now.")); - } - return true; +std::vector PredictorEngine::operator()( + const std::vector &inputs) { + return utils::ToDenseTensors(this->operator()(utils::ToTensors(inputs))); } } // namespace jit diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index f2d1c396617b1..93030c9138fa8 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -770,7 +770,11 @@ PyObject* ToPyObject(const std::vector>& value) { PyObject* ToPyObject(const std::vector& value, bool return_py_none_if_not_initialize) { + // NOTE(liuyuanle): I encountered a bug(access violation) in windows. ref to + // https://stackoverflow.com/questions/55598839/how-to-fix-access-violation-error-when-returning-pyobject-from-c-function-usin + PyGILState_STATE gstate = PyGILState_Ensure(); PyObject* result = PyList_New((Py_ssize_t)value.size()); + PyGILState_Release(gstate); for (size_t i = 0; i < value.size(); i++) { if (!value[i].initialized() && return_py_none_if_not_initialize) { diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 44966f930d3f1..0546dd84b6882 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -65,7 +65,7 @@ constexpr int NPY_UINT16_ = 4; // paddle::platform::float16 as numpy.float16. // Ref: https://github.com/pybind/pybind11/issues/1776 template <> -struct npy_format_descriptor { +struct npy_format_descriptor { static py::dtype dtype() { handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_); return reinterpret_borrow(ptr); @@ -180,7 +180,7 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) { dt = py::dtype::of(); break; case PaddleDType::FLOAT16: - dt = py::dtype::of(); + dt = py::dtype::of(); break; case PaddleDType::UINT8: dt = py::dtype::of(); @@ -264,7 +264,7 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT ToPaddleInferPlace(input_tensor.place().GetType())); } else if (input_tensor.dtype() == phi::DataType::FLOAT16) { tensor.ShareExternalData( - static_cast(input_tensor.data()), + static_cast(input_tensor.data()), shape, ToPaddleInferPlace(input_tensor.place().GetType())); } else if (input_tensor.dtype() == phi::DataType::INT32) { @@ -353,7 +353,7 @@ size_t PaddleGetDTypeSize(PaddleDType dt) { size = sizeof(float); break; case PaddleDType::FLOAT16: - size = sizeof(paddle_infer::float16); + size = sizeof(phi::dtype::float16); break; case PaddleDType::INT8: size = sizeof(int8_t); @@ -392,8 +392,8 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT tensor.copy_to_cpu(static_cast(array.mutable_data())); break; case PaddleDType::FLOAT16: - tensor.copy_to_cpu( - static_cast(array.mutable_data())); + tensor.copy_to_cpu( + static_cast(array.mutable_data())); break; case PaddleDType::UINT8: tensor.copy_to_cpu(static_cast(array.mutable_data())); @@ -432,8 +432,8 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT tensor.CopyToCpu(static_cast(array.mutable_data())); break; case PaddleDType::FLOAT16: - tensor.CopyToCpu( - static_cast(array.mutable_data())); + tensor.CopyToCpu( + static_cast(array.mutable_data())); break; case PaddleDType::UINT8: tensor.CopyToCpu(static_cast(array.mutable_data())); @@ -1062,6 +1062,16 @@ void BindPaddleInferPredictor(py::module *m) { .def("get_output_names", &paddle_infer::Predictor::GetOutputNames) .def("get_input_handle", &paddle_infer::Predictor::GetInputHandle) .def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle) + .def( + "run", + [](paddle_infer::Predictor &self, py::handle py_in_tensor_list) { + auto in_tensor_list = + CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); + std::vector outputs; + self.Run(in_tensor_list, &outputs); + return py::handle(ToPyObject(outputs)); + }, + py::arg("inputs")) .def("run", [](paddle_infer::Predictor &self) { self.Run(); }) .def("clone", [](paddle_infer::Predictor &self) { return self.Clone(nullptr); }) @@ -1091,9 +1101,9 @@ void BindZeroCopyTensor(py::module *m) { .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) + .def("copy_from_cpu", &ZeroCopyTensorCreate) // NOTE(liuyuanle): double must be bound after float. .def("copy_from_cpu", &ZeroCopyTensorCreate) - .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyStringTensorCreate) .def("copy_to_cpu", &ZeroCopyTensorToNumpy) @@ -1116,10 +1126,9 @@ void BindPaddleInferTensor(py::module *m) { .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) + .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) // NOTE(liuyuanle): double must be bound after float. .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) - .def("_copy_from_cpu_bind", - &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferStringTensorCreate) .def("_share_external_data_bind", &PaddleInferShareExternalData) diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index d3943750fd21e..24bcc63dbd278 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -416,7 +416,7 @@ class PADDLE_API Tensor final { /** * @brief Return the name of Tensor. * @note Used to adapt original execution mechanism and debug analysis - * in the development of new dygraph. It may be removed in the future. + * in the development of new dygraph. * * @return const std::string& */ @@ -425,7 +425,7 @@ class PADDLE_API Tensor final { /** * @brief Set name of Tensor. * @note Used to adapt original execution mechanism and debug analysis - * in the development of new dygraph. It may be removed in the future. + * in the development of new dygraph. * * @param const std::string& name */ @@ -657,7 +657,7 @@ class PADDLE_API Tensor final { /** * Tensor name: used to adapt original execution mechanism and debug analysis - * in the development of new dygraph. It may be removed in the future. + * in the development of new dygraph. */ std::string name_{""}; diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 3cebef1588ea5..6a409b6419623 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -136,6 +136,7 @@ Tensor add_n_impl(const std::vector& x) { Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) { Tensor out; copy(x, place, blocking, &out); + out.set_name(x.name()); return out; } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py new file mode 100644 index 0000000000000..99ba29956c5da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py @@ -0,0 +1,128 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import numpy as np + +import paddle +from paddle.inference import Config, create_predictor + + +class TestNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + self.fc1 = paddle.nn.Linear(4, 4) + self.fc2 = paddle.nn.Linear(4, 4) + + def forward(self, x1, x2): + y1 = self.fc1(x1) + y2 = self.fc2(x2) + return y1 + y2 + + +@unittest.skipIf( + not paddle.is_compiled_with_cuda(), 'should compile with cuda.' +) +class TestPredictorRunWithTensor(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + net = TestNet() + model = paddle.jit.to_static( + net, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 4], dtype='float32', name='input0' + ), + paddle.static.InputSpec( + shape=[None, 4], dtype='float32', name='input1' + ), + ], + ) + paddle.jit.save( + model, + os.path.join( + self.temp_dir.name, 'test_predictor_run_model/inference' + ), + ) + + def tearDown(self): + self.temp_dir.cleanup() + + def init_predictor(self): + config = Config( + os.path.join( + self.temp_dir.name, + 'test_predictor_run_model/inference.pdmodel', + ), + os.path.join( + self.temp_dir.name, + 'test_predictor_run_model/inference.pdiparams', + ), + ) + config.enable_use_gpu(256, 0) + config.enable_memory_optim() + predictor = create_predictor(config) + return predictor + + def get_inputs(self): + input0 = np.array([[1, 2, 3, 4], [2, 3, 4, 5]]).astype(np.float32) + input1 = np.array([[0.1, 0.2, 0.3, 0.4], [1.2, 1.3, 1.4, 1.5]]).astype( + np.float32 + ) + + input0_tensor = paddle.to_tensor(input0) + input1_tensor = paddle.to_tensor(input1) + + return [input0_tensor, input1_tensor] + + def get_disorder_output(self): + predictor = self.init_predictor() + + [input0_tensor, input1_tensor] = self.get_inputs() + + input_names = predictor.get_input_names() + input0_tensor.name = input_names[0] + input1_tensor.name = input_names[1] + + # disorder + inputs = [input1_tensor, input0_tensor] + outputs = predictor.run(inputs) + + return outputs[0] + + def get_inorder_output(self): + predictor = self.init_predictor() + + [input0_tensor, input1_tensor] = self.get_inputs() + + # inorder + inputs = [input0_tensor, input1_tensor] + outputs = predictor.run(inputs) + + return outputs[0] + + def test_output(self): + inorder_output = self.get_inorder_output() + disorder_output = self.get_disorder_output() + + assert np.allclose( + inorder_output.numpy().flatten(), disorder_output.numpy().flatten() + ) + + +if __name__ == '__main__': + unittest.main() From 745425778aac65c71a9af45e7982227e4fa84add Mon Sep 17 00:00:00 2001 From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com> Date: Tue, 11 Apr 2023 17:08:54 +0800 Subject: [PATCH 23/59] remove -Wimplicit-fallthrough (#52717) * delete [-Wno-error=terminate], test=develop * remove GPUps[-Wterminate],test=develop * remove some -Wno-, test=develop * modify ~MatmulDescriptor * mess * remove -Wimplicit-fallthrough, test=develop * remove -Wimplicit-fallthrough, test=develop * remove -Wimplicit-fallthrough, test=develop * remove -Wimplicit-fallthrough, test=develop * remove , test=develop --- paddle/fluid/imperative/layout_transformer.h | 6 ++++++ .../api/composite_backward/composite_backward_api.h | 4 ++++ paddle/utils/string/tinyformat/tinyformat.h | 13 +++++++++++++ 3 files changed, 23 insertions(+) diff --git a/paddle/fluid/imperative/layout_transformer.h b/paddle/fluid/imperative/layout_transformer.h index 4dba2d16d598c..93c924a095c9e 100644 --- a/paddle/fluid/imperative/layout_transformer.h +++ b/paddle/fluid/imperative/layout_transformer.h @@ -402,10 +402,16 @@ class ArgmaxOpTransformer case paddle::framework::proto::AttrType::INT: { auto axis = PADDLE_GET_CONST(int, (*attrs)["axis"]); (*attrs)["axis"] = static_cast(perm[axis]); +#ifdef LINUX + __attribute__((fallthrough)); +#endif } case paddle::framework::proto::AttrType::LONG: { auto axis = PADDLE_GET_CONST(int64_t, (*attrs)["axis"]); (*attrs)["axis"] = static_cast(perm[axis]); +#ifdef LINUX + __attribute__((fallthrough)); +#endif } default: VLOG(4) << "The data_type of axis is Error, axis must be int or " diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index 286d3cae8de5d..6697f1a614c38 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -1485,7 +1485,11 @@ void batch_norm_grad(const Tensor& x, } break; } +#ifdef LINUX + __attribute__((fallthrough)); +#endif } + default: PADDLE_THROW(phi::errors::InvalidArgument("Unknown storage order: %s", data_layout)); diff --git a/paddle/utils/string/tinyformat/tinyformat.h b/paddle/utils/string/tinyformat/tinyformat.h index f9c55fe1835fd..bd8d47849db96 100644 --- a/paddle/utils/string/tinyformat/tinyformat.h +++ b/paddle/utils/string/tinyformat/tinyformat.h @@ -691,6 +691,9 @@ inline const char *streamStateFromFormat(std::ostream &out, // NOLINT break; case 'X': out.setf(std::ios::uppercase); +#ifdef LINUX + __attribute__((fallthrough)); +#endif case 'x': case 'p': out.setf(std::ios::hex, std::ios::basefield); @@ -698,17 +701,27 @@ inline const char *streamStateFromFormat(std::ostream &out, // NOLINT break; case 'E': out.setf(std::ios::uppercase); +#ifdef LINUX + __attribute__((fallthrough)); +#endif case 'e': out.setf(std::ios::scientific, std::ios::floatfield); out.setf(std::ios::dec, std::ios::basefield); break; case 'F': out.setf(std::ios::uppercase); +#ifdef LINUX + __attribute__((fallthrough)); +#endif case 'f': out.setf(std::ios::fixed, std::ios::floatfield); break; case 'G': out.setf(std::ios::uppercase); +#ifdef LINUX + __attribute__((fallthrough)); +#endif + case 'g': out.setf(std::ios::dec, std::ios::basefield); // As in boost::format, let stream decide float format. From 8e9bfa7f38e3de7ae28030216248894aa285476b Mon Sep 17 00:00:00 2001 From: lzydev Date: Tue, 11 Apr 2023 17:59:31 +0800 Subject: [PATCH 24/59] autogen unique (#52738) --- .../generator/get_expected_kernel_func.cc | 18 ++ .../generator/get_expected_kernel_func.h | 4 + paddle/fluid/operators/unique_op.cc | 168 ------------------ paddle/phi/api/yaml/op_compat.yaml | 9 + paddle/phi/api/yaml/static_ops.yaml | 10 ++ paddle/phi/ops/compat/unique_sig.cc | 11 ++ 6 files changed, 52 insertions(+), 168 deletions(-) delete mode 100644 paddle/fluid/operators/unique_op.cc diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc index a4b0e637e12dc..ce2cbb43deed0 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc @@ -158,5 +158,23 @@ phi::KernelKey GetMatrixNmsExpectedKernelType( platform::CPUPlace()); } +phi::KernelKey GetUniqueExpectedKernelType( + const framework::ExecutionContext& ctx, + const framework::OperatorWithKernel* op_ptr) { + (void)ctx; + // Return CPUPlace when Attr("is_sorted") is false. Because it means + // that fluid.layers.unique is called, but there is no cuda kernel. + if (!ctx.Attr("is_sorted")) { + return phi::KernelKey( + op_ptr->OperatorWithKernel::IndicateVarDataType(ctx, "X"), + platform::CPUPlace()); + } else { + // new version paddle.unique is called. + return phi::KernelKey( + op_ptr->OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.GetPlace()); + } +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.h b/paddle/fluid/operators/generator/get_expected_kernel_func.h index a83f5865e3499..cbbb74e2312ed 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.h +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.h @@ -44,5 +44,9 @@ phi::KernelKey GetMatrixNmsExpectedKernelType( const framework::ExecutionContext& ctx, const framework::OperatorWithKernel* op_ptr); +phi::KernelKey GetUniqueExpectedKernelType( + const framework::ExecutionContext& ctx, + const framework::OperatorWithKernel* op_ptr); + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/unique_op.cc b/paddle/fluid/operators/unique_op.cc deleted file mode 100644 index 5484a16ca6bd4..0000000000000 --- a/paddle/fluid/operators/unique_op.cc +++ /dev/null @@ -1,168 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/unique_op.h" - -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class UniqueOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "unique"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "unique"); - - bool return_index = ctx->Attrs().Get("return_index"); - bool return_inverse = ctx->Attrs().Get("return_inverse"); - bool return_counts = ctx->Attrs().Get("return_counts"); - auto axis_vec = ctx->Attrs().Get>("axis"); - auto data_type = - static_cast(static_cast( - ctx->Attrs().Get("dtype"))); - - // Construct MetaTensor for InferMeta Func - using CompatMetaTensor = framework::CompatMetaTensor; - CompatMetaTensor x(ctx->GetInputVarPtrs("X")[0], ctx->IsRuntime()); - CompatMetaTensor out(ctx->GetOutputVarPtrs("Out")[0], ctx->IsRuntime()); - std::unique_ptr indices(nullptr); - std::unique_ptr index(nullptr); - std::unique_ptr counts(nullptr); - - if (return_index) { - OP_INOUT_CHECK(ctx->HasOutput("Indices"), "Output", "Indices", "unique"); - indices = - std::move(std::unique_ptr(new CompatMetaTensor( - ctx->GetOutputVarPtrs("Indices")[0], ctx->IsRuntime()))); - } - if (return_inverse) { - OP_INOUT_CHECK(ctx->HasOutput("Index"), "Output", "Index", "unique"); - index = std::move(std::unique_ptr(new CompatMetaTensor( - ctx->GetOutputVarPtrs("Index")[0], ctx->IsRuntime()))); - } - if (return_counts) { - OP_INOUT_CHECK(ctx->HasOutput("Counts"), "Output", "Counts", "unique"); - counts = std::move(std::unique_ptr(new CompatMetaTensor( - ctx->GetOutputVarPtrs("Counts")[0], ctx->IsRuntime()))); - } - bool is_sorted = ctx->Attrs().Get("is_sorted"); - if (is_sorted) { - phi::UniqueInferMeta(x, - return_index, - return_inverse, - return_counts, - axis_vec, - data_type, - &out, - indices.get(), - index.get(), - counts.get()); - } else { - OP_INOUT_CHECK(ctx->HasOutput("Index"), "Output", "Index", "unique"); - if (index == nullptr) { - index = - std::move(std::unique_ptr(new CompatMetaTensor( - ctx->GetOutputVarPtrs("Index")[0], ctx->IsRuntime()))); - } - phi::UniqueRawInferMeta(x, - return_index, - return_inverse, - return_counts, - axis_vec, - data_type, - is_sorted, - &out, - indices.get(), - index.get(), - counts.get()); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - // Return CPUPlace when Attr("is_sorted") is false. Because it means - // that fluid.layers.unique is called, but there is no cuda kernel. - if (!ctx.Attr("is_sorted")) { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - platform::CPUPlace()); - } else { - // new version paddle.unique is called. - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } - } -}; - -class UniqueOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "Input tensor. It should be a 1-D tensor when Attr(is_sorted)" - " is false or a N-D tensor when Attr(is_sorted) is true."); - AddAttr("dtype", "data type for output index"); - AddOutput("Out", "A unique subsequence for input tensor."); - AddOutput("Index", - "Equivalent to inverse in numpy.unique, " - "the indices for where elements in the original input ended up " - "in the returned unique tensor."); - AddOutput( - "Indices", - "The indices of the input tensor that result in the unique tensor.") - .AsDispensable(); - AddOutput("Counts", "The counts for each unique element.").AsDispensable(); - AddAttr("return_index", - "If True, also return the indices of the input" - " tensor that result in the unique Tensor.") - .SetDefault(false); - AddAttr( - "return_inverse", - "If True, also return the indices for where elements" - " in the original input ended up in the returned unique tensor.") - .SetDefault(false); - AddAttr("return_counts", - "If True, also return the counts for each unique element.") - .SetDefault(false); - AddAttr>( - "axis", - "The axis to apply unique. If None, the input will be flattened.") - .SetDefault({}); - AddAttr("is_sorted", - "If True, the unique elements of X are in ascending order." - "Otherwise, the unique elements are not sorted.") - .SetDefault(false); - AddComment(R"DOC( - 1. Return a unique subsequence for 1-D input tensor, and an index tensor - pointing to this unique subsequence when Attr(is_sorted) is false. This - means paddle.unique is called. - - 2. Returns the unique elements of X in ascending order when Attr(is_sorted) - is true. This means fluid.layers.unique is called. -)DOC"); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_WITHOUT_GRADIENT(unique, ops::UniqueOp, ops::UniqueOpMaker); diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 90c75a8dcc6cd..e53909aa3fdee 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -2241,6 +2241,15 @@ support_tensor : true manual_signature : [uniform] +- op : unique + inputs : + {x : X} + outputs : + {out : Out, indices : Indices, inverse : Index, counts : Counts} + get_expected_kernel_type : + unique : GetUniqueExpectedKernelType + manual_signature : [unique] + - op : unique_consecutive inputs : x : X diff --git a/paddle/phi/api/yaml/static_ops.yaml b/paddle/phi/api/yaml/static_ops.yaml index f0f26e27c1f2c..802c6b1d46df5 100644 --- a/paddle/phi/api/yaml/static_ops.yaml +++ b/paddle/phi/api/yaml/static_ops.yaml @@ -342,3 +342,13 @@ func : uniform param: [shape, dtype, min, max, seed] data_type : dtype + +- op : unique + args : (Tensor x, bool return_index=false, bool return_inverse=false, bool return_counts=false, int[] axis={}, DataType dtype=DataType::INT64, bool is_sorted=false) + output : Tensor(out), Tensor(indices), Tensor(inverse), Tensor(counts) + optional : indices, counts + infer_meta : + func : UniqueRawInferMeta + kernel : + func : unique + data_type : x diff --git a/paddle/phi/ops/compat/unique_sig.cc b/paddle/phi/ops/compat/unique_sig.cc index 2a7ba543012f3..8a38775bc6080 100644 --- a/paddle/phi/ops/compat/unique_sig.cc +++ b/paddle/phi/ops/compat/unique_sig.cc @@ -17,6 +17,17 @@ limitations under the License. */ namespace phi { KernelSignature UniqueOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.IsForInferShape()) { + return KernelSignature("unique_raw", + {"X"}, + {"return_index", + "return_inverse", + "return_counts", + "axis", + "dtype", + "is_sorted"}, + {"Out", "Indices", "Index", "Counts"}); + } bool is_sorted = paddle::any_cast(ctx.Attr("is_sorted")); if (is_sorted) { return KernelSignature( From 2a24a6bb895c53677df88d5c1197b5e7f441be55 Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Tue, 11 Apr 2023 20:51:15 +0800 Subject: [PATCH 25/59] [CustomOP Unittest] Polish unit test, unify check_output (#52737) * [CustomOP Unittest] Polish unit test, unify check_output * fix test_static_save_and_run_inference_predictor --- test/custom_op/test_custom_conj.py | 56 +++---- test/custom_op/test_custom_inplace.py | 154 ++++++++---------- test/custom_op/test_custom_linear.py | 31 +--- test/custom_op/test_custom_optional.py | 124 +++++--------- test/custom_op/test_custom_relu_op_setup.py | 66 ++------ .../test_custom_relu_op_xpu_setup.py | 69 +------- test/custom_op/test_custom_simple_slice.py | 10 +- .../custom_op/test_custom_tanh_double_grad.py | 34 +--- test/custom_op/test_custom_tensor_operator.py | 55 ++++--- test/custom_op/test_multi_out_jit.py | 31 ++-- test/custom_op/utils.py | 42 +++++ 11 files changed, 255 insertions(+), 417 deletions(-) diff --git a/test/custom_op/test_custom_conj.py b/test/custom_op/test_custom_conj.py index c30463bc34869..f51038ae1b34c 100644 --- a/test/custom_op/test_custom_conj.py +++ b/test/custom_op/test_custom_conj.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from utils import extra_cc_args, extra_nvcc_args, paddle_includes +from utils import check_output, extra_cc_args, extra_nvcc_args, paddle_includes import paddle from paddle import static @@ -100,42 +100,27 @@ def setUp(self): self.dtypes = ['float32', 'float64'] self.shape = [2, 20, 2, 3] - def check_output(self, out, pd_out, name): - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out, name, pd_out - ), - ) - - def run_dynamic(self, dtype, np_input): - out, x_grad = conj_dynamic(custom_ops.custom_conj, dtype, np_input) - pd_out, pd_x_grad = conj_dynamic(paddle.conj, dtype, np_input) - - self.check_output(out, pd_out, "out") - self.check_output(x_grad, pd_x_grad, "x's grad") - - def run_static(self, dtype, np_input): - out, x_grad = conj_static( - custom_ops.custom_conj, self.shape, dtype, np_input - ) - pd_out, pd_x_grad = conj_static( - paddle.conj, self.shape, dtype, np_input - ) - - self.check_output(out, pd_out, "out") - self.check_output(x_grad, pd_x_grad, "x's grad") - def test_dynamic(self): for dtype in self.dtypes: np_input = np.random.random(self.shape).astype(dtype) - self.run_dynamic(dtype, np_input) + out, x_grad = conj_dynamic(custom_ops.custom_conj, dtype, np_input) + pd_out, pd_x_grad = conj_dynamic(paddle.conj, dtype, np_input) + + check_output(out, pd_out, "out") + check_output(x_grad, pd_x_grad, "x's grad") def test_static(self): for dtype in self.dtypes: np_input = np.random.random(self.shape).astype(dtype) - self.run_static(dtype, np_input) + out, x_grad = conj_static( + custom_ops.custom_conj, self.shape, dtype, np_input + ) + pd_out, pd_x_grad = conj_static( + paddle.conj, self.shape, dtype, np_input + ) + + check_output(out, pd_out, "out") + check_output(x_grad, pd_x_grad, "x's grad") # complex only used in dynamic mode now def test_complex_dynamic(self): @@ -143,7 +128,16 @@ def test_complex_dynamic(self): np_input = np.random.random(self.shape).astype( dtype ) + 1j * np.random.random(self.shape).astype(dtype) - self.run_dynamic(to_complex(dtype), np_input) + + out, x_grad = conj_dynamic( + custom_ops.custom_conj, to_complex(dtype), np_input + ) + pd_out, pd_x_grad = conj_dynamic( + paddle.conj, to_complex(dtype), np_input + ) + + check_output(out, pd_out, "out") + check_output(x_grad, pd_x_grad, "x's grad") if __name__ == "__main__": diff --git a/test/custom_op/test_custom_inplace.py b/test/custom_op/test_custom_inplace.py index 2c0a5d4c513c1..f5eed712cdcf9 100644 --- a/test/custom_op/test_custom_inplace.py +++ b/test/custom_op/test_custom_inplace.py @@ -16,7 +16,13 @@ import unittest import numpy as np -from utils import extra_cc_args, extra_nvcc_args, paddle_includes +from utils import ( + check_output, + check_output_allclose, + extra_cc_args, + extra_nvcc_args, + paddle_includes, +) import paddle from paddle import static @@ -342,26 +348,6 @@ def setUp(self): np.random.random((3, 2)).astype("float32"), ] - def check_output(self, out, pd_out, name): - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out, name, pd_out - ), - ) - - def check_output_allclose(self, out, pd_out, name): - np.testing.assert_allclose( - out, - pd_out, - rtol=5e-5, - atol=1e-2, - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out, name, pd_out - ), - ) - def test_static_add(self): for device in self.devices: for dtype in self.dtypes: @@ -391,15 +377,15 @@ def test_static_add(self): self.np_x, self.np_y, ) - self.check_output(custom_x, custom_out, "inplace_custom_x") - self.check_output( + check_output(custom_x, custom_out, "inplace_custom_x") + check_output( custom_x_grad, custom_out_grad, "inplace_custom_x_grad" ) - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") - self.check_output(custom_out_grad, pd_out_grad, "out_grad") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_out_grad, pd_out_grad, "out_grad") def test_dynamic_add(self): for device in self.devices: @@ -431,14 +417,14 @@ def test_dynamic_add(self): self.np_y, ) - self.check_output(custom_x, custom_out, "inplace_custom_x") - self.check_output(pd_x, pd_out, "inplace_pd_x") + check_output(custom_x, custom_out, "inplace_custom_x") + check_output(pd_x, pd_out, "inplace_pd_x") - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_y, pd_y, "y") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_y, pd_y, "y") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") def test_static_add_vector(self): for device in self.devices: @@ -468,10 +454,10 @@ def test_static_add_vector(self): self.np_y, ) - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") - self.check_output(custom_out_grad, pd_out_grad, "out_grad") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_out_grad, pd_out_grad, "out_grad") def test_dynamic_add_vector(self): for device in self.devices: @@ -503,14 +489,14 @@ def test_dynamic_add_vector(self): self.np_y, ) - self.check_output(custom_x, custom_out, "inplace_custom_x") - self.check_output(pd_x, pd_out, "inplace_pd_x") + check_output(custom_x, custom_out, "inplace_custom_x") + check_output(pd_x, pd_out, "inplace_pd_x") - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_y, pd_y, "y") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_y, pd_y, "y") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") def test_static_relu_net(self): for device in self.devices: @@ -543,11 +529,11 @@ def test_static_relu_net(self): self.np_y, self.np_z, ) - self.check_output_allclose(custom_x, pd_x, "x") - self.check_output_allclose(custom_y, pd_y, "y") - self.check_output_allclose(custom_out, pd_out, "out") - self.check_output_allclose(custom_x_grad, pd_x_grad, "x_grad") - self.check_output_allclose(custom_y_grad, pd_y_grad, "y_grad") + check_output_allclose(custom_x, pd_x, "x") + check_output_allclose(custom_y, pd_y, "y") + check_output_allclose(custom_out, pd_out, "out") + check_output_allclose(custom_x_grad, pd_x_grad, "x_grad") + check_output_allclose(custom_y_grad, pd_y_grad, "y_grad") def test_dynamic_relu_net(self): for device in self.devices: @@ -581,11 +567,11 @@ def test_dynamic_relu_net(self): self.np_z, ) - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_y, pd_y, "y") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_y, pd_y, "y") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") def test_static_multi_inplace(self): for device in self.devices: @@ -630,27 +616,23 @@ def test_static_multi_inplace(self): self.np_a, self.np_b, ) - self.check_output(custom_x, pd_out_xy, "inplace_custom_x") - self.check_output( + check_output(custom_x, pd_out_xy, "inplace_custom_x") + check_output( custom_x_grad, custom_out_xy_grad, "inplace_custom_x_grad" ) - self.check_output(custom_a, pd_out_ab, "inplace_custom_a") - self.check_output( + check_output(custom_a, pd_out_ab, "inplace_custom_a") + check_output( custom_a_grad, custom_out_ab_grad, "inplace_custom_a_grad" ) - self.check_output(custom_out_xy, pd_out_xy, "outxy") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") - self.check_output( - custom_out_xy_grad, pd_out_xy_grad, "outxy_grad" - ) - self.check_output(custom_out_ab, pd_out_ab, "outab") - self.check_output(custom_a_grad, pd_a_grad, "a_grad") - self.check_output(custom_b_grad, pd_b_grad, "b_grad") - self.check_output( - custom_out_ab_grad, pd_out_ab_grad, "outab_grad" - ) + check_output(custom_out_xy, pd_out_xy, "outxy") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_out_xy_grad, pd_out_xy_grad, "outxy_grad") + check_output(custom_out_ab, pd_out_ab, "outab") + check_output(custom_a_grad, pd_a_grad, "a_grad") + check_output(custom_b_grad, pd_b_grad, "b_grad") + check_output(custom_out_ab_grad, pd_out_ab_grad, "outab_grad") def test_dynamic_multi_inplace(self): for device in self.devices: @@ -696,21 +678,21 @@ def test_dynamic_multi_inplace(self): self.np_b, ) - self.check_output(custom_x, custom_out_xy, "inplace_custom_x") - self.check_output(pd_x, pd_out_xy, "inplace_pd_x") - self.check_output(custom_a, custom_out_ab, "inplace_custom_a") - self.check_output(pd_a, pd_out_ab, "inplace_pd_a") - - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_y, pd_y, "y") - self.check_output(custom_out_xy, pd_out_xy, "outxy") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") - self.check_output(custom_a, pd_a, "a") - self.check_output(custom_b, pd_b, "b") - self.check_output(custom_out_ab, pd_out_ab, "outab") - self.check_output(custom_a_grad, pd_a_grad, "a_grad") - self.check_output(custom_b_grad, pd_b_grad, "b_grad") + check_output(custom_x, custom_out_xy, "inplace_custom_x") + check_output(pd_x, pd_out_xy, "inplace_pd_x") + check_output(custom_a, custom_out_ab, "inplace_custom_a") + check_output(pd_a, pd_out_ab, "inplace_pd_a") + + check_output(custom_x, pd_x, "x") + check_output(custom_y, pd_y, "y") + check_output(custom_out_xy, pd_out_xy, "outxy") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_a, pd_a, "a") + check_output(custom_b, pd_b, "b") + check_output(custom_out_ab, pd_out_ab, "outab") + check_output(custom_a_grad, pd_a_grad, "a_grad") + check_output(custom_b_grad, pd_b_grad, "b_grad") if __name__ == "__main__": diff --git a/test/custom_op/test_custom_linear.py b/test/custom_op/test_custom_linear.py index 5cd4b5e14f7dd..60a881bdb6a0c 100644 --- a/test/custom_op/test_custom_linear.py +++ b/test/custom_op/test_custom_linear.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from utils import extra_cc_args, extra_nvcc_args, paddle_includes +from utils import check_output, extra_cc_args, extra_nvcc_args, paddle_includes import paddle import paddle.nn.functional as F @@ -99,15 +99,6 @@ def setUp(self): self.np_weight = np.full([2, 4], fill_value=0.5, dtype="float32") self.np_bias = np.ones([4], dtype="float32") - def check_output(self, out, pd_out, name): - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out, name, pd_out - ), - ) - def test_static(self): for device in self.devices: for dtype in self.dtypes: @@ -132,12 +123,10 @@ def test_static(self): self.np_weight, self.np_bias, ) - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output( - custom_weight_grad, pd_weight_grad, "weight_grad" - ) - self.check_output(custom_bias_grad, pd_bias_grad, "bias_grad") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_weight_grad, pd_weight_grad, "weight_grad") + check_output(custom_bias_grad, pd_bias_grad, "bias_grad") def test_dynamic(self): for device in self.devices: @@ -168,12 +157,10 @@ def test_dynamic(self): self.np_weight, self.np_bias, ) - self.check_output(custom_out, pd_out, "custom_out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output( - custom_weight_grad, pd_weight_grad, "weight_grad" - ) - self.check_output(custom_bias_grad, pd_bias_grad, "bias_grad") + check_output(custom_out, pd_out, "custom_out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_weight_grad, pd_weight_grad, "weight_grad") + check_output(custom_bias_grad, pd_bias_grad, "bias_grad") if __name__ == "__main__": diff --git a/test/custom_op/test_custom_optional.py b/test/custom_op/test_custom_optional.py index 53d4f15952740..1c1335b37bd98 100644 --- a/test/custom_op/test_custom_optional.py +++ b/test/custom_op/test_custom_optional.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from utils import extra_cc_args, extra_nvcc_args, paddle_includes +from utils import check_output, extra_cc_args, extra_nvcc_args, paddle_includes import paddle from paddle import static @@ -465,44 +465,6 @@ def setUp(self): np.random.random((3, 2)).astype("float32"), ] - def check_output(self, out, pd_out, name): - if out is None and pd_out is None: - return - assert out is not None, "out value of " + name + " is None" - assert pd_out is not None, "pd_out value of " + name + " is None" - if isinstance(out, list) and isinstance(pd_out, list): - for idx in range(len(out)): - np.testing.assert_array_equal( - out[idx], - pd_out[idx], - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out[idx], name, pd_out[idx] - ), - ) - else: - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out, name, pd_out - ), - ) - - def check_output_allclose(self, out, pd_out, name): - if out is None and pd_out is None: - return - assert out is not None, "out value of " + name + " is None" - assert pd_out is not None, "pd_out value of " + name + " is None" - np.testing.assert_allclose( - out, - pd_out, - rtol=5e-5, - atol=1e-2, - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out, name, pd_out - ), - ) - def test_optional_static_add(self): for device in self.devices: for dtype in self.dtypes: @@ -526,9 +488,9 @@ def test_optional_static_add(self): np_y, ) - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") def test_optional_dynamic_add(self): for device in self.devices: @@ -553,9 +515,9 @@ def test_optional_dynamic_add(self): np_y, ) - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") def test_optional_inplace_static_add(self): for device in self.devices: @@ -576,13 +538,11 @@ def test_optional_inplace_static_add(self): np_y, ) - self.check_output(custom_tuple[0], pd_tuple[0], "x") - self.check_output(custom_tuple[1], pd_tuple[1], "out") - self.check_output(custom_tuple[2], pd_tuple[2], "x_grad") + check_output(custom_tuple[0], pd_tuple[0], "x") + check_output(custom_tuple[1], pd_tuple[1], "out") + check_output(custom_tuple[2], pd_tuple[2], "x_grad") if len(custom_tuple) > 3: - self.check_output( - custom_tuple[3], pd_tuple[3], "y_grad" - ) + check_output(custom_tuple[3], pd_tuple[3], "y_grad") def test_optional_inplace_dynamic_add(self): for device in self.devices: @@ -619,16 +579,16 @@ def test_optional_inplace_dynamic_add(self): np_y, ) - self.check_output(pd_y, pd_outy, "inplace_pd_y") - self.check_output(custom_y, custom_outy, "inplace_custom_y") + check_output(pd_y, pd_outy, "inplace_pd_y") + check_output(custom_y, custom_outy, "inplace_custom_y") - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_outx, pd_outx, "outx") - self.check_output(custom_y, pd_y, "y") - self.check_output(custom_outy, pd_outy, "outy") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_outx, pd_outx, "outx") + check_output(custom_y, pd_y, "y") + check_output(custom_outy, pd_outy, "outy") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") def test_optional_vector_static_add(self): for device in self.devices: @@ -653,9 +613,9 @@ def test_optional_vector_static_add(self): np_y, ) - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") def test_optional_vector_dynamic_add(self): for device in self.devices: @@ -680,9 +640,9 @@ def test_optional_vector_dynamic_add(self): np_y, ) - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") def test_optional_inplace_vector_static_add(self): for device in self.devices: @@ -703,16 +663,12 @@ def test_optional_inplace_vector_static_add(self): np_y, ) - self.check_output(custom_tuple[0], pd_tuple[0], "x") - self.check_output(custom_tuple[1], pd_tuple[1], "out") - self.check_output(custom_tuple[2], pd_tuple[2], "x_grad") + check_output(custom_tuple[0], pd_tuple[0], "x") + check_output(custom_tuple[1], pd_tuple[1], "out") + check_output(custom_tuple[2], pd_tuple[2], "x_grad") if len(custom_tuple) > 3: - self.check_output( - custom_tuple[3], pd_tuple[3], "y1_grad" - ) - self.check_output( - custom_tuple[4], pd_tuple[4], "y2_grad" - ) + check_output(custom_tuple[3], pd_tuple[3], "y1_grad") + check_output(custom_tuple[4], pd_tuple[4], "y2_grad") def test_optional_inplace_vector_dynamic_add(self): for device in self.devices: @@ -749,16 +705,16 @@ def test_optional_inplace_vector_dynamic_add(self): np_y, ) - self.check_output(pd_y, pd_outy, "inplace_pd_y") - self.check_output(custom_y, custom_outy, "inplace_custom_y") + check_output(pd_y, pd_outy, "inplace_pd_y") + check_output(custom_y, custom_outy, "inplace_custom_y") - self.check_output(custom_x, pd_x, "x") - self.check_output(custom_outx, pd_outx, "outx") - self.check_output(custom_y, pd_y, "y") - self.check_output(custom_outy, pd_outy, "outy") - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_x_grad, pd_x_grad, "x_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_x, pd_x, "x") + check_output(custom_outx, pd_outx, "outx") + check_output(custom_y, pd_y, "y") + check_output(custom_outy, pd_outy, "outy") + check_output(custom_out, pd_out, "out") + check_output(custom_x_grad, pd_x_grad, "x_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") if __name__ == "__main__": diff --git a/test/custom_op/test_custom_relu_op_setup.py b/test/custom_op/test_custom_relu_op_setup.py index 8a164b0472933..8673a806313fe 100644 --- a/test/custom_op/test_custom_relu_op_setup.py +++ b/test/custom_op/test_custom_relu_op_setup.py @@ -18,6 +18,7 @@ import unittest import numpy as np +from utils import check_output, check_output_allclose import paddle from paddle import static @@ -205,13 +206,7 @@ def test_static(self): pd_out = custom_relu_static( custom_op, device, dtype, x, False ) - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) + check_output(out, pd_out, "out") def test_dynamic(self): for device in self.devices: @@ -226,20 +221,8 @@ def test_dynamic(self): pd_out, pd_x_grad = custom_relu_dynamic( custom_op, device, dtype, x, False ) - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) - np.testing.assert_array_equal( - x_grad, - pd_x_grad, - err_msg='custom op x grad: {},\n paddle api x grad: {}'.format( - x_grad, pd_x_grad - ), - ) + check_output(out, pd_out, "out") + check_output(x_grad, pd_x_grad, "x_grad") def test_static_save_and_load_inference_model(self): paddle.enable_static() @@ -263,13 +246,7 @@ def test_static_save_and_load_inference_model(self): feed={feed_target_names[0]: np_data}, fetch_list=fetch_targets, ) - np.testing.assert_array_equal( - predict, - predict_infer, - err_msg='custom op predict: {},\n custom op infer predict: {}'.format( - predict, predict_infer - ), - ) + check_output(predict, predict_infer, "predict") paddle.disable_static() def test_static_save_and_run_inference_predictor(self): @@ -298,12 +275,9 @@ def test_static_save_and_run_inference_predictor(self): predictor.get_output_names()[0] ) predict_infer = output_tensor.copy_to_cpu() - self.assertTrue( - np.isclose(predict, predict_infer, rtol=5e-5).any(), - "custom op predict: {},\n custom op infer predict: {}".format( - predict, predict_infer - ), - ) + predict = np.array(predict).flatten() + predict_infer = np.array(predict_infer).flatten() + check_output_allclose(predict, predict_infer, "predict") paddle.disable_static() def test_double_grad_dynamic(self): @@ -318,20 +292,8 @@ def test_double_grad_dynamic(self): pd_out, pd_dx_grad = custom_relu_double_grad_dynamic( self.custom_ops[0], device, dtype, x, False ) - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) - np.testing.assert_array_equal( - dx_grad, - pd_dx_grad, - err_msg='custom op dx grad: {},\n paddle api dx grad: {}'.format( - dx_grad, pd_dx_grad - ), - ) + check_output(out, pd_out, "out") + check_output(dx_grad, pd_dx_grad, "dx_grad") def test_with_dataloader(self): for device in self.devices: @@ -355,13 +317,7 @@ def test_with_dataloader(self): image = paddle.to_tensor(image) out = self.custom_ops[0](image) pd_out = paddle.nn.functional.relu(image) - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) + check_output(out, pd_out, "out") if batch_id == 5: break diff --git a/test/custom_op/test_custom_relu_op_xpu_setup.py b/test/custom_op/test_custom_relu_op_xpu_setup.py index 3eed65668ebc8..e054eadafd03a 100644 --- a/test/custom_op/test_custom_relu_op_xpu_setup.py +++ b/test/custom_op/test_custom_relu_op_xpu_setup.py @@ -18,6 +18,7 @@ import unittest import numpy as np +from utils import check_output, check_output_allclose import paddle from paddle import static @@ -183,13 +184,7 @@ def test_static(self): pd_out = custom_relu_static( self.custom_op, self.device, dtype, x, False ) - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) + check_output(out, pd_out, "out") def test_dynamic(self): for dtype in self.dtypes: @@ -200,20 +195,8 @@ def test_dynamic(self): pd_out, pd_x_grad = custom_relu_dynamic( self.custom_op, self.device, dtype, x, False ) - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) - np.testing.assert_array_equal( - x_grad, - pd_x_grad, - err_msg='custom op x grad: {},\n paddle api x grad: {}'.format( - x_grad, pd_x_grad - ), - ) + check_output(out, pd_out, "out") + check_output(x_grad, pd_x_grad, "x_grad") def test_static_save_and_load_inference_model(self): paddle.enable_static() @@ -237,14 +220,7 @@ def test_static_save_and_load_inference_model(self): feed={feed_target_names[0]: np_data}, fetch_list=fetch_targets, ) - np.testing.assert_allclose( - predict, - predict_infer, - atol=1e-2, - err_msg='custom op predict: {},\n custom op infer predict: {}'.format( - predict, predict_infer - ), - ) + check_output(predict, predict_infer, "predict") paddle.disable_static() def test_static_save_and_run_inference_predictor(self): @@ -272,15 +248,7 @@ def test_static_save_and_run_inference_predictor(self): predict_infer = output_tensor.copy_to_cpu() predict = np.array(predict).flatten() predict_infer = np.array(predict_infer).flatten() - np.testing.assert_allclose( - predict, - predict_infer, - rtol=5e-5, - atol=1e-2, - err_msg="custom op predict: {},\n custom op infer predict: {}".format( - predict, predict_infer - ), - ) + check_output_allclose(predict, predict_infer, "predict") paddle.disable_static() def test_func_double_grad_dynamic(self): @@ -292,20 +260,8 @@ def test_func_double_grad_dynamic(self): pd_out, pd_dx_grad = custom_relu_double_grad_dynamic( self.custom_op, self.device, dtype, x, False ) - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) - np.testing.assert_array_equal( - dx_grad, - pd_dx_grad, - err_msg='custom op dx grad: {},\n paddle api dx grad: {}'.format( - dx_grad, pd_dx_grad - ), - ) + check_output(out, pd_out, "out") + check_output(dx_grad, pd_dx_grad, "dx_grad") def test_with_dataloader(self): paddle.disable_static() @@ -328,14 +284,7 @@ def test_with_dataloader(self): for batch_id, (image, _) in enumerate(train_loader()): out = self.custom_op(image) pd_out = paddle.nn.functional.relu(image) - np.testing.assert_allclose( - out, - pd_out, - atol=1e-2, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) + check_output_allclose(out, pd_out, "out", atol=1e-2) if batch_id == 5: break diff --git a/test/custom_op/test_custom_simple_slice.py b/test/custom_op/test_custom_simple_slice.py index d69322103520c..e2662e70f3bc6 100644 --- a/test/custom_op/test_custom_simple_slice.py +++ b/test/custom_op/test_custom_simple_slice.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from utils import extra_cc_args, extra_nvcc_args, paddle_includes +from utils import check_output, extra_cc_args, extra_nvcc_args, paddle_includes import paddle from paddle.utils.cpp_extension import get_build_directory, load @@ -47,13 +47,7 @@ def test_slice_output(self): x = paddle.to_tensor(np_x) custom_op_out = custom_ops.custom_simple_slice(x, 2, 3) np_out = np_x[2:3] - np.testing.assert_array_equal( - custom_op_out, - np_out, - err_msg='custom op: {},\n numpy: {}'.format( - np_out, custom_op_out.numpy() - ), - ) + check_output(custom_op_out, np_out, "out") if __name__ == "__main__": diff --git a/test/custom_op/test_custom_tanh_double_grad.py b/test/custom_op/test_custom_tanh_double_grad.py index 08c57dac91fe1..a47ce712dd6a4 100644 --- a/test/custom_op/test_custom_tanh_double_grad.py +++ b/test/custom_op/test_custom_tanh_double_grad.py @@ -16,7 +16,12 @@ import unittest import numpy as np -from utils import extra_cc_args, extra_nvcc_args, paddle_includes +from utils import ( + check_output_allclose, + extra_cc_args, + extra_nvcc_args, + paddle_includes, +) import paddle from paddle.utils.cpp_extension import get_build_directory, load @@ -77,30 +82,9 @@ def test_double_grad_dynamic(self): pd_out, pd_dx_grad, pd_dout = custom_tanh_double_grad_dynamic( paddle.tanh, device, dtype, x ) - np.testing.assert_allclose( - out, - pd_out, - rtol=1e-05, - err_msg='custom op out: {},\n paddle api out: {}'.format( - out, pd_out - ), - ) - np.testing.assert_allclose( - dx_grad, - pd_dx_grad, - rtol=1e-05, - err_msg='custom op dx grad: {},\n paddle api dx grad: {}'.format( - dx_grad, pd_dx_grad - ), - ) - np.testing.assert_allclose( - dout, - pd_dout, - rtol=1e-05, - err_msg='custom op out grad: {},\n paddle api out grad: {}'.format( - dout, pd_dout - ), - ) + check_output_allclose(out, pd_out, "out", rtol=1e-05) + check_output_allclose(dx_grad, pd_dx_grad, "out", rtol=1e-05) + check_output_allclose(dout, pd_dout, "dout", rtol=1e-05) if __name__ == "__main__": diff --git a/test/custom_op/test_custom_tensor_operator.py b/test/custom_op/test_custom_tensor_operator.py index 4e524b2f5b16b..f6edbd934171d 100644 --- a/test/custom_op/test_custom_tensor_operator.py +++ b/test/custom_op/test_custom_tensor_operator.py @@ -16,7 +16,12 @@ import unittest import numpy as np -from utils import extra_cc_args, paddle_includes +from utils import ( + check_output, + check_output_allclose, + extra_cc_args, + paddle_includes, +) import paddle from paddle import static @@ -260,7 +265,7 @@ def _test_static(self): pd_out = test_custom_add_static( self.add, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) out = test_custom_subtract_static( self.subtract, device, dtype, x @@ -268,7 +273,7 @@ def _test_static(self): pd_out = test_custom_subtract_static( self.subtract, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) out = test_custom_multiply_static( self.multiply, device, dtype, x @@ -276,13 +281,13 @@ def _test_static(self): pd_out = test_custom_multiply_static( self.multiply, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) out = test_custom_divide_static(self.divide, device, dtype, x) pd_out = test_custom_divide_static( self.divide, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) def _test_dynamic(self): for device in self.devices: @@ -297,9 +302,9 @@ def _test_dynamic(self): pd_out, pd_x_grad = test_custom_add_dynamic( self.add, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) - np.testing.assert_allclose( - x_grad, pd_x_grad, rtol=1e-5, atol=1e-8 + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) + check_output_allclose( + x_grad, pd_x_grad, "x_grad", rtol=1e-5, atol=1e-8 ) out, x_grad = test_custom_subtract_dynamic( @@ -308,9 +313,9 @@ def _test_dynamic(self): pd_out, pd_x_grad = test_custom_subtract_dynamic( self.subtract, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) - np.testing.assert_allclose( - x_grad, pd_x_grad, rtol=1e-5, atol=1e-8 + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) + check_output_allclose( + x_grad, pd_x_grad, "x_grad", rtol=1e-5, atol=1e-8 ) out, x_grad = test_custom_multiply_dynamic( @@ -319,9 +324,9 @@ def _test_dynamic(self): pd_out, pd_x_grad = test_custom_multiply_dynamic( self.multiply, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) - np.testing.assert_allclose( - x_grad, pd_x_grad, rtol=1e-5, atol=1e-8 + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) + check_output_allclose( + x_grad, pd_x_grad, "x_grad", rtol=1e-5, atol=1e-8 ) out, x_grad = test_custom_divide_dynamic( @@ -330,7 +335,7 @@ def _test_dynamic(self): pd_out, pd_x_grad = test_custom_divide_dynamic( self.divide, device, dtype, x, False ) - np.testing.assert_allclose(out, pd_out, rtol=1e-5, atol=1e-8) + check_output_allclose(out, pd_out, "out", rtol=1e-5, atol=1e-8) def _test_logical_operants(self): for device in self.devices: @@ -342,19 +347,19 @@ def _test_logical_operants(self): out = self.custom_module.custom_logical_and(x, y) pd_out = paddle.bitwise_and(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_logical_or(x, y) pd_out = paddle.bitwise_or(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_logical_xor(x, y) pd_out = paddle.bitwise_xor(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_logical_not(x) pd_out = paddle.bitwise_not(x) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") def _test_compare_operants(self): for device in self.devices: @@ -366,27 +371,27 @@ def _test_compare_operants(self): out = self.custom_module.custom_less_than(x, y) pd_out = paddle.less_than(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_less_equal(x, y) pd_out = paddle.less_equal(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_equal(x, y) pd_out = paddle.equal(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_not_equal(x, y) pd_out = paddle.not_equal(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_greater_than(x, y) pd_out = paddle.greater_than(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") out = self.custom_module.custom_greater_equal(x, y) pd_out = paddle.greater_equal(x, y) - np.testing.assert_equal(out.numpy(), pd_out.numpy()) + check_output(out.numpy(), pd_out.numpy(), "out") if __name__ == '__main__': diff --git a/test/custom_op/test_multi_out_jit.py b/test/custom_op/test_multi_out_jit.py index f3e3a6ec8abc1..a191ab33e6a7c 100644 --- a/test/custom_op/test_multi_out_jit.py +++ b/test/custom_op/test_multi_out_jit.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from utils import extra_cc_args, paddle_includes +from utils import check_output, extra_cc_args, paddle_includes import paddle from paddle import static @@ -105,15 +105,6 @@ def setUp(self): self.np_y = np.random.uniform(-1, 1, [4, 8]).astype("float32") self.np_z = np.random.uniform(-1, 1, [4, 8]).astype("float32") - def check_output(self, out, pd_out, name): - np.testing.assert_array_equal( - out, - pd_out, - err_msg='custom op {}: {},\n paddle api {}: {}'.format( - name, out, name, pd_out - ), - ) - def run_static(self, device, dtype): paddle.set_device(device) x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype) @@ -140,14 +131,12 @@ def check_multi_outputs(self, outs, is_dynamic=False): one_int32 = one_int32.numpy() # Fake_float64 self.assertTrue('float64' in str(zero_float64.dtype)) - np.testing.assert_array_equal( - zero_float64, np.zeros([4, 8]).astype('float64') + check_output( + zero_float64, np.zeros([4, 8]).astype('float64'), "zero_float64" ) # ZFake_int32 self.assertTrue('int32' in str(one_int32.dtype)) - np.testing.assert_array_equal( - one_int32, np.ones([4, 8]).astype('int32') - ) + check_output(one_int32, np.ones([4, 8]).astype('int32'), "one_int32") def test_multi_out_static(self): paddle.enable_static() @@ -193,10 +182,10 @@ def test_discrete_out_static(self): self.np_y, self.np_z, ) - self.check_output(custom_out, pd_out, "out") + check_output(custom_out, pd_out, "out") # NOTE: In static mode, the output gradient of custom operator has been optimized to shape=[1]. However, native paddle op's output shape = [4, 8], hence we need to fetch pd_w_grad[0][0] (By the way, something wrong with native paddle's gradient, the outputs with other indexes instead of pd_w_grad[0][0] is undefined in this unittest.) - self.check_output(custom_w_grad, pd_w_grad[0][0], "w_grad") - self.check_output(custom_y_grad, pd_y_grad[0][0], "y_grad") + check_output(custom_w_grad, pd_w_grad[0][0], "w_grad") + check_output(custom_y_grad, pd_y_grad[0][0], "y_grad") def test_discrete_out_dynamic(self): for device in self.devices: @@ -223,9 +212,9 @@ def test_discrete_out_dynamic(self): self.np_y, self.np_z, ) - self.check_output(custom_out, pd_out, "out") - self.check_output(custom_w_grad, pd_w_grad, "w_grad") - self.check_output(custom_y_grad, pd_y_grad, "y_grad") + check_output(custom_out, pd_out, "out") + check_output(custom_w_grad, pd_w_grad, "w_grad") + check_output(custom_y_grad, pd_y_grad, "y_grad") if __name__ == '__main__': diff --git a/test/custom_op/utils.py b/test/custom_op/utils.py index 7e199f3a6114d..d65a0f2175f6e 100644 --- a/test/custom_op/utils.py +++ b/test/custom_op/utils.py @@ -16,6 +16,8 @@ import sys from site import getsitepackages +import numpy as np + from paddle.utils.cpp_extension.extension_utils import IS_WINDOWS IS_MAC = sys.platform.startswith('darwin') @@ -39,3 +41,43 @@ extra_cc_args = ['-w', '-g'] if not IS_WINDOWS else ['/w'] extra_nvcc_args = ['-O3'] extra_compile_args = {'cc': extra_cc_args, 'nvcc': extra_nvcc_args} + + +def check_output(out, pd_out, name): + if out is None and pd_out is None: + return + assert out is not None, "out value of " + name + " is None" + assert pd_out is not None, "pd_out value of " + name + " is None" + if isinstance(out, list) and isinstance(pd_out, list): + for idx in range(len(out)): + np.testing.assert_array_equal( + out[idx], + pd_out[idx], + err_msg='custom op {}: {},\n paddle api {}: {}'.format( + name, out[idx], name, pd_out[idx] + ), + ) + else: + np.testing.assert_array_equal( + out, + pd_out, + err_msg='custom op {}: {},\n paddle api {}: {}'.format( + name, out, name, pd_out + ), + ) + + +def check_output_allclose(out, pd_out, name, rtol=5e-5, atol=1e-2): + if out is None and pd_out is None: + return + assert out is not None, "out value of " + name + " is None" + assert pd_out is not None, "pd_out value of " + name + " is None" + np.testing.assert_allclose( + out, + pd_out, + rtol, + atol, + err_msg='custom op {}: {},\n paddle api {}: {}'.format( + name, out, name, pd_out + ), + ) From 7a78a57143b0f66a9a020f961cbb99b93059882a Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Wed, 12 Apr 2023 09:58:11 +0800 Subject: [PATCH 26/59] fix force sync bug in paddle.grad (#52779) --- paddle/fluid/eager/backward.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index a220fe18fb35d..2216b6b01427e 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -113,7 +113,6 @@ std::vector RunBackward( std::queue force_sequential_nodes_forward_queue = egr::Controller::Instance().GetForceSequentialNodes(); - egr::Controller::Instance().ClearForceSequentialNodes(); std::deque force_sequential_nodes_queue; std::set force_sequential_nodes_set; std::set ready_force_sequential_nodes; @@ -421,6 +420,7 @@ void Backward(const std::vector& tensors, // outputs VLOG(3) << "Run in Backward"; paddle::platform::RecordEvent backward_record_event( "backward", paddle::platform::TracerEventType::UserDefined, 1); + egr::Controller::Instance().ClearForceSequentialNodes(); RunBackward(tensors, grad_tensors, retain_graph); phi::autotune::AutoTuneStatus::Instance().Update(); } From f05c870b30ebb8c1470e05e64325fc29882db6b7 Mon Sep 17 00:00:00 2001 From: megemini Date: Wed, 12 Apr 2023 10:04:44 +0800 Subject: [PATCH 27/59] =?UTF-8?q?=E3=80=90Hackathon=204th=20No.13=E3=80=91?= =?UTF-8?q?=E4=B8=BA=20Paddle=20=E6=96=B0=E5=A2=9E=20Bernoulli=20API=20=20?= =?UTF-8?q?(#52244)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 【Hackathon 4th No.13】为 Paddle 新增 Bernoulli API * [Change]change unittest_py scipy version * [Change]修改BernoulliNumpy的类型参数;优化静态图测试流程 * [Change]优化类的初始化及逻辑;增加0D相关测试用例 --- python/paddle/distribution/__init__.py | 2 + python/paddle/distribution/bernoulli.py | 485 ++++++++++++++ python/paddle/distribution/kl.py | 6 + .../test_distribution_bernoulli.py | 596 ++++++++++++++++++ .../test_distribution_bernoulli_static.py | 468 ++++++++++++++ 5 files changed, 1557 insertions(+) create mode 100644 python/paddle/distribution/bernoulli.py create mode 100644 python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli.py create mode 100644 python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli_static.py diff --git a/python/paddle/distribution/__init__.py b/python/paddle/distribution/__init__.py index 77b83fa6a94c5..418ef478aaf13 100644 --- a/python/paddle/distribution/__init__.py +++ b/python/paddle/distribution/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. from paddle.distribution import transform +from paddle.distribution.bernoulli import Bernoulli from paddle.distribution.beta import Beta from paddle.distribution.categorical import Categorical from paddle.distribution.dirichlet import Dirichlet @@ -30,6 +31,7 @@ from paddle.distribution.laplace import Laplace __all__ = [ # noqa + 'Bernoulli', 'Beta', 'Categorical', 'Dirichlet', diff --git a/python/paddle/distribution/bernoulli.py b/python/paddle/distribution/bernoulli.py new file mode 100644 index 0000000000000..d6c6551b0c5ce --- /dev/null +++ b/python/paddle/distribution/bernoulli.py @@ -0,0 +1,485 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import numpy as np + +import paddle +from paddle.distribution import exponential_family +from paddle.fluid.data_feeder import check_type, convert_dtype +from paddle.fluid.framework import _non_static_mode +from paddle.fluid.layers import tensor +from paddle.nn.functional import ( + binary_cross_entropy_with_logits, + sigmoid, + softplus, +) + +# Smallest representable number +EPS = { + 'float32': paddle.finfo(paddle.float32).eps, + 'float64': paddle.finfo(paddle.float64).eps, +} + + +def _clip_probs(probs, dtype): + """Clip probs from [0, 1] to (0, 1) with ``eps``. + + Args: + probs (Tensor): probs of Bernoulli. + dtype (str): data type. + + Returns: + Tensor: Clipped probs. + """ + eps = EPS.get(dtype) + return paddle.clip(probs, min=eps, max=1 - eps).astype(dtype) + + +class Bernoulli(exponential_family.ExponentialFamily): + r"""Bernoulli distribution parameterized by ``probs``, which is the probability of value 1. + + In probability theory and statistics, the Bernoulli distribution, named after Swiss + mathematician Jacob Bernoulli, is the discrete probability distribution of a random + variable which takes the value 1 with probability ``p`` and the value 0 with + probability ``q=1-p``. + + The probability mass function of this distribution, over possible outcomes ``k``, is + + .. math:: + + {\begin{cases} + q=1-p & \text{if }value=0 \\ + p & \text{if }value=1 + \end{cases}} + + Args: + probs (float|Tensor): The ``probs`` input of Bernoulli distribution. The data type is float32 or float64. The range must be in [0, 1]. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + # init `probs` with a float + rv = Bernoulli(probs=0.3) + + print(rv.mean) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [0.30000001]) + + print(rv.variance) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [0.21000001]) + + print(rv.entropy()) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [0.61086434]) + """ + + def __init__(self, probs, name=None): + self.name = name or 'Bernoulli' + if not _non_static_mode(): + check_type( + probs, + 'probs', + (float, tensor.Variable), + self.name, + ) + + # Get/convert probs to tensor. + if self._validate_args(probs): + self.probs = probs + self.dtype = convert_dtype(probs.dtype) + else: + [self.probs] = self._to_tensor(probs) + self.dtype = paddle.get_default_dtype() + + # Check probs range [0, 1]. + if _non_static_mode(): + """Not use `paddle.any` in static mode, which always be `True`.""" + if ( + paddle.any(self.probs < 0) + or paddle.any(self.probs > 1) + or paddle.any(paddle.isnan(self.probs)) + ): + raise ValueError("The arg of `probs` must be in range [0, 1].") + + # Clip probs from [0, 1] to (0, 1) with smallest representable number `eps`. + self.probs = _clip_probs(self.probs, self.dtype) + self.logits = self._probs_to_logits(self.probs, is_binary=True) + + super().__init__(batch_shape=self.probs.shape, event_shape=()) + + @property + def mean(self): + """Mean of Bernoulli distribution. + + Returns: + Tensor: Mean value of distribution. + """ + return self.probs + + @property + def variance(self): + """Variance of Bernoulli distribution. + + Returns: + Tensor: Variance value of distribution. + """ + return paddle.multiply(self.probs, (1 - self.probs)) + + def sample(self, shape): + """Sample from Bernoulli distribution. + + Args: + shape (Sequence[int]): Sample shape. + + Returns: + Tensor: Sampled data with shape `sample_shape` + `batch_shape` + `event_shape`. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + rv = Bernoulli(paddle.full((), 0.3)) + print(rv.sample([100]).shape) + # [100] + + rv = Bernoulli(paddle.to_tensor(0.3)) + print(rv.sample([100]).shape) + # [100, 1] + + rv = Bernoulli(paddle.to_tensor([0.3, 0.5])) + print(rv.sample([100]).shape) + # [100, 2] + + rv = Bernoulli(paddle.to_tensor([0.3, 0.5])) + print(rv.sample([100, 2]).shape) + # [100, 2, 2] + """ + name = self.name + '_sample' + if not _non_static_mode(): + check_type( + shape, + 'shape', + (np.ndarray, tensor.Variable, list, tuple), + name, + ) + + shape = shape if isinstance(shape, tuple) else tuple(shape) + shape = self._extend_shape(shape) + + with paddle.no_grad(): + return paddle.bernoulli(self.probs.expand(shape), name=name) + + def rsample(self, shape, temperature=1.0): + """Sample from Bernoulli distribution (reparameterized). + + The `rsample` is a continuously approximate of Bernoulli distribution reparameterized sample method. + [1] Chris J. Maddison, Andriy Mnih, and Yee Whye Teh. The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables. 2016. + [2] Eric Jang, Shixiang Gu, and Ben Poole. Categorical Reparameterization with Gumbel-Softmax. 2016. + + Note: + `rsample` need to be followed by a `sigmoid`, which converts samples' value to unit interval (0, 1). + + Args: + shape (Sequence[int]): Sample shape. + temperature (float): temperature for rsample, must be positive. + + Returns: + Tensor: Sampled data with shape `sample_shape` + `batch_shape` + `event_shape`. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + paddle.seed(2023) + + rv = Bernoulli(paddle.full((), 0.3)) + print(rv.sample([100]).shape) + # [100] + + rv = Bernoulli(0.3) + print(rv.rsample([100]).shape) + # [100, 1] + + rv = Bernoulli(paddle.to_tensor([0.3, 0.5])) + print(rv.rsample([100]).shape) + # [100, 2] + + rv = Bernoulli(paddle.to_tensor([0.3, 0.5])) + print(rv.rsample([100, 2]).shape) + # [100, 2, 2] + + # `rsample` has to be followed by a `sigmoid` + rv = Bernoulli(0.3) + rsample = rv.rsample([3, ]) + rsample_sigmoid = paddle.nn.functional.sigmoid(rsample) + print(rsample, rsample_sigmoid) + # Tensor(shape=[3, 1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [[-0.88315082], + # [-0.62347704], + # [-0.31513220]]) Tensor(shape=[3, 1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [[0.29252526], + # [0.34899110], + # [0.42186251]]) + + # The smaller the `temperature`, the distribution of `rsample` closer to `sample`, with `probs` of 0.3. + print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=1.0)).sum()) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [361.06829834]) + + print(paddle.nn.functional.sigmoid(rv.rsample([1000, ], temperature=0.1)).sum()) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [288.66418457]) + """ + name = self.name + '_rsample' + if not _non_static_mode(): + check_type( + shape, + 'shape', + (np.ndarray, tensor.Variable, list, tuple), + name, + ) + check_type( + temperature, + 'temperature', + (float,), + name, + ) + + shape = shape if isinstance(shape, tuple) else tuple(shape) + shape = self._extend_shape(shape) + + temperature = paddle.full( + shape=(), fill_value=temperature, dtype=self.dtype + ) + + probs = self.probs.expand(shape) + uniforms = paddle.rand(shape, dtype=self.dtype) + return paddle.divide( + paddle.add( + paddle.subtract(uniforms.log(), (-uniforms).log1p()), + paddle.subtract(probs.log(), (-probs).log1p()), + ), + temperature, + ) + + def cdf(self, value): + r"""Cumulative distribution function(CDF) evaluated at value. + + .. math:: + + { \begin{cases} + 0 & \text{if } value \lt 0 \\ + 1 - p & \text{if } 0 \leq value \lt 1 \\ + 1 & \text{if } value \geq 1 + \end{cases} + } + + Args: + value (Tensor): Value to be evaluated. + + Returns: + Tensor: CDF evaluated at value. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + rv = Bernoulli(0.3) + print(rv.cdf(paddle.to_tensor([1.0]))) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [1.]) + """ + name = self.name + '_cdf' + if not _non_static_mode(): + check_type(value, 'value', tensor.Variable, name) + + value = self._check_values_dtype_in_probs(self.probs, value) + probs, value = paddle.broadcast_tensors([self.probs, value]) + + zeros = paddle.zeros_like(probs) + ones = paddle.ones_like(probs) + + return paddle.where( + value < 0, + zeros, + paddle.where(value < 1, paddle.subtract(ones, probs), ones), + name=name, + ) + + def log_prob(self, value): + """Log of probability densitiy function. + + Args: + value (Tensor): Value to be evaluated. + + Returns: + Tensor: Log of probability densitiy evaluated at value. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + rv = Bernoulli(0.3) + print(rv.log_prob(paddle.to_tensor([1.0]))) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [-1.20397282]) + """ + name = self.name + '_log_prob' + if not _non_static_mode(): + check_type(value, 'value', tensor.Variable, name) + + value = self._check_values_dtype_in_probs(self.probs, value) + logits, value = paddle.broadcast_tensors([self.logits, value]) + return -binary_cross_entropy_with_logits( + logits, value, reduction='none', name=name + ) + + def prob(self, value): + r"""Probability density function(PDF) evaluated at value. + + .. math:: + + { \begin{cases} + q=1-p & \text{if }value=0 \\ + p & \text{if }value=1 + \end{cases} + } + + Args: + value (Tensor): Value to be evaluated. + + Returns: + Tensor: PDF evaluated at value. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + rv = Bernoulli(0.3) + print(rv.prob(paddle.to_tensor([1.0]))) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [0.29999998]) + """ + name = self.name + '_prob' + if not _non_static_mode(): + check_type(value, 'value', tensor.Variable, name) + + return self.log_prob(value).exp(name=name) + + def entropy(self): + r"""Entropy of Bernoulli distribution. + + .. math:: + + { + entropy = -(q \log q + p \log p) + } + + Returns: + Tensor: Entropy of distribution. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + rv = Bernoulli(0.3) + print(rv.entropy()) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [0.61086434]) + """ + name = self.name + '_entropy' + + return binary_cross_entropy_with_logits( + self.logits, self.probs, reduction='none', name=name + ) + + def kl_divergence(self, other): + r"""The KL-divergence between two Bernoulli distributions. + + .. math:: + + { + KL(a || b) = p_a \log(p_a / p_b) + (1 - p_a) \log((1 - p_a) / (1 - p_b)) + } + + Args: + other (Bernoulli): instance of Bernoulli. + + Returns: + Tensor: kl-divergence between two Bernoulli distributions. + + Examples: + + .. code-block:: python + + import paddle + from paddle.distribution import Bernoulli + + rv = Bernoulli(0.3) + rv_other = Bernoulli(0.7) + + print(rv.kl_divergence(rv_other)) + # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + # [0.33891910]) + """ + name = self.name + '_kl_divergence' + if not _non_static_mode(): + check_type(other, 'other', Bernoulli, name) + + a_logits = self.logits + b_logits = other.logits + + log_pa = -softplus(-a_logits) + log_pb = -softplus(-b_logits) + + pa = sigmoid(a_logits) + one_minus_pa = sigmoid(-a_logits) + + log_one_minus_pa = -softplus(a_logits) + log_one_minus_pb = -softplus(b_logits) + + return paddle.add( + paddle.subtract( + paddle.multiply(log_pa, pa), paddle.multiply(log_pb, pa) + ), + paddle.subtract( + paddle.multiply(log_one_minus_pa, one_minus_pa), + paddle.multiply(log_one_minus_pb, one_minus_pa), + ), + ) diff --git a/python/paddle/distribution/kl.py b/python/paddle/distribution/kl.py index ac3b94d4ebd66..3d630b5802b72 100644 --- a/python/paddle/distribution/kl.py +++ b/python/paddle/distribution/kl.py @@ -15,6 +15,7 @@ import warnings import paddle +from paddle.distribution.bernoulli import Bernoulli from paddle.distribution.beta import Beta from paddle.distribution.categorical import Categorical from paddle.distribution.dirichlet import Dirichlet @@ -143,6 +144,11 @@ def __le__(self, other): return True +@register_kl(Bernoulli, Bernoulli) +def _kl_bernoulli_bernoulli(p, q): + return p.kl_divergence(q) + + @register_kl(Beta, Beta) def _kl_beta_beta(p, q): return ( diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli.py new file mode 100644 index 0000000000000..2229880b7a6bf --- /dev/null +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli.py @@ -0,0 +1,596 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import scipy.special +import scipy.stats +from config import ATOL, DEVICES, RTOL +from parameterize import ( + TEST_CASE_NAME, + parameterize_cls, + parameterize_func, + place, +) +from test_distribution import DistributionNumpy + +import paddle +from paddle.distribution import Bernoulli +from paddle.distribution.kl import kl_divergence +from paddle.fluid.data_feeder import convert_dtype + +np.random.seed(2023) +paddle.seed(2023) + +# Smallest representable number. +EPS = { + 'float32': np.finfo('float32').eps, + 'float64': np.finfo('float64').eps, +} + + +def _clip_probs_ndarray(probs, dtype): + """Clip probs from [0, 1] to (0, 1) with ``eps``""" + eps = EPS.get(dtype) + return np.clip(probs, a_min=eps, a_max=1 - eps).astype(dtype) + + +def _sigmoid(z): + return scipy.special.expit(z) + + +def _kstest(samples_a, samples_b, temperature=1): + """Uses the Kolmogorov-Smirnov test for goodness of fit.""" + _, p_value = scipy.stats.ks_2samp(samples_a, samples_b) + return not (p_value < 0.02 * (min(1, temperature))) + + +class BernoulliNumpy(DistributionNumpy): + def __init__(self, probs): + probs = np.array(probs) + if str(probs.dtype) not in ['float32', 'float64']: + self.dtype = 'float32' + else: + self.dtype = probs.dtype + + self.batch_shape = np.shape(probs) + + self.probs = _clip_probs_ndarray( + np.array(probs, dtype=self.dtype), str(self.dtype) + ) + self.logits = self._probs_to_logits(self.probs, is_binary=True) + + self.rv = scipy.stats.bernoulli(self.probs.astype('float64')) + + @property + def mean(self): + return self.rv.mean().astype(self.dtype) + + @property + def variance(self): + return self.rv.var().astype(self.dtype) + + def sample(self, shape): + shape = np.array(shape, dtype='int') + if shape.ndim: + shape = shape.tolist() + else: + shape = [shape.tolist()] + return self.rv.rvs(size=shape + list(self.batch_shape)).astype( + self.dtype + ) + + def log_prob(self, value): + return self.rv.logpmf(value).astype(self.dtype) + + def prob(self, value): + return self.rv.pmf(value).astype(self.dtype) + + def cdf(self, value): + return self.rv.cdf(value).astype(self.dtype) + + def entropy(self): + return ( + np.maximum( + self.logits, + 0, + ) + - self.logits * self.probs + + np.log(1 + np.exp(-np.abs(self.logits))) + ).astype(self.dtype) + + def kl_divergence(self, other): + """ + .. math:: + + KL[a || b] = Pa * Log[Pa / Pb] + (1 - Pa) * Log[(1 - Pa) / (1 - Pb)] + """ + p_a = self.probs + p_b = other.probs + return ( + p_a * np.log(p_a / p_b) + (1 - p_a) * np.log((1 - p_a) / (1 - p_b)) + ).astype(self.dtype) + + def _probs_to_logits(self, probs, is_binary=False): + return ( + (np.log(probs) - np.log1p(-probs)) if is_binary else np.log(probs) + ).astype(self.dtype) + + +class BernoulliTest(unittest.TestCase): + def setUp(self): + paddle.disable_static(self.place) + with paddle.fluid.dygraph.guard(self.place): + # just for convenience + self.dtype = self.expected_dtype + + # init numpy with `dtype` + self.init_numpy_data(self.probs, self.dtype) + + # init paddle and check dtype convert. + self.init_dynamic_data(self.probs, self.default_dtype, self.dtype) + + def init_numpy_data(self, probs, dtype): + probs = np.array(probs).astype(dtype) + self.rv_np = BernoulliNumpy(probs) + + def init_dynamic_data(self, probs, default_dtype, dtype): + self.rv_paddle = Bernoulli(probs) + self.assertTrue( + dtype == convert_dtype(self.rv_paddle.probs.dtype), + (dtype, self.rv_paddle.probs.dtype), + ) + + +@place(DEVICES) +@parameterize_cls( + (TEST_CASE_NAME, 'probs', 'default_dtype', 'expected_dtype'), + [ + # 0-D probs + ('probs_00_32', paddle.full((), 0.0), 'float32', 'float32'), + ('probs_03_32', paddle.full((), 0.3), 'float32', 'float32'), + ('probs_10_32', paddle.full((), 1.0), 'float32', 'float32'), + ( + 'probs_00_64', + paddle.full((), 0.0, dtype='float64'), + 'float64', + 'float64', + ), + ( + 'probs_03_64', + paddle.full((), 0.3, dtype='float64'), + 'float64', + 'float64', + ), + ( + 'probs_10_64', + paddle.full((), 1.0, dtype='float64'), + 'float64', + 'float64', + ), + # 1-D probs + ('probs_00', 0.0, 'float64', 'float32'), + ('probs_03', 0.3, 'float64', 'float32'), + ('probs_10', 1.0, 'float64', 'float32'), + ('probs_tensor_03_32', paddle.to_tensor(0.3), 'float32', 'float32'), + ( + 'probs_tensor_03_64', + paddle.to_tensor(0.3, dtype='float64'), + 'float64', + 'float64', + ), + ( + 'probs_tensor_03_list_32', + paddle.to_tensor( + [ + 0.3, + ] + ), + 'float32', + 'float32', + ), + ( + 'probs_tensor_03_list_64', + paddle.to_tensor( + [ + 0.3, + ], + dtype='float64', + ), + 'float64', + 'float64', + ), + # N-D probs + ( + 'probs_tensor_0305', + paddle.to_tensor((0.3, 0.5)), + 'float32', + 'float32', + ), + ( + 'probs_tensor_03050104', + paddle.to_tensor(((0.3, 0.5), (0.1, 0.4))), + 'float32', + 'float32', + ), + ], +) +class BernoulliTestFeature(BernoulliTest): + def test_mean(self): + with paddle.fluid.dygraph.guard(self.place): + np.testing.assert_allclose( + self.rv_paddle.mean, + self.rv_np.mean, + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + + def test_variance(self): + with paddle.fluid.dygraph.guard(self.place): + np.testing.assert_allclose( + self.rv_paddle.variance, + self.rv_np.variance, + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + + @parameterize_func( + [ + ( + paddle.to_tensor( + [ + 0.0, + ] + ), + ), + ( + paddle.to_tensor( + 0.0, + ), + ), + (paddle.to_tensor(1.0),), + (paddle.to_tensor(0.0, dtype='float64'),), + ] + ) + def test_log_prob(self, value): + with paddle.fluid.dygraph.guard(self.place): + if convert_dtype(value.dtype) == convert_dtype( + self.rv_paddle.probs.dtype + ): + log_prob = self.rv_paddle.log_prob(value) + np.testing.assert_allclose( + log_prob, + self.rv_np.log_prob(value), + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + self.assertTrue(self.dtype == convert_dtype(log_prob.dtype)) + + else: + with self.assertWarns(UserWarning): + self.rv_paddle.log_prob(value) + + @parameterize_func( + [ + ( + paddle.to_tensor( + [ + 0.0, + ] + ), + ), + (paddle.to_tensor(0.0),), + (paddle.to_tensor(1.0),), + (paddle.to_tensor(0.0, dtype='float64'),), + ] + ) + def test_prob(self, value): + with paddle.fluid.dygraph.guard(self.place): + if convert_dtype(value.dtype) == convert_dtype( + self.rv_paddle.probs.dtype + ): + prob = self.rv_paddle.prob(value) + np.testing.assert_allclose( + prob, + self.rv_np.prob(value), + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + self.assertTrue(self.dtype == convert_dtype(prob.dtype)) + + else: + with self.assertWarns(UserWarning): + self.rv_paddle.prob(value) + + @parameterize_func( + [ + ( + paddle.to_tensor( + [ + 0.0, + ] + ), + ), + (paddle.to_tensor(0.0),), + (paddle.to_tensor(0.3),), + (paddle.to_tensor(0.7),), + (paddle.to_tensor(1.0),), + (paddle.to_tensor(0.0, dtype='float64'),), + ] + ) + def test_cdf(self, value): + with paddle.fluid.dygraph.guard(self.place): + if convert_dtype(value.dtype) == convert_dtype( + self.rv_paddle.probs.dtype + ): + cdf = self.rv_paddle.cdf(value) + np.testing.assert_allclose( + cdf, + self.rv_np.cdf(value), + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + self.assertTrue(self.dtype == convert_dtype(cdf.dtype)) + + else: + with self.assertWarns(UserWarning): + self.rv_paddle.cdf(value) + + def test_entropy(self): + with paddle.fluid.dygraph.guard(self.place): + np.testing.assert_allclose( + self.rv_paddle.entropy(), + self.rv_np.entropy(), + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + + def test_kl_divergence(self): + with paddle.fluid.dygraph.guard(self.place): + other_probs = paddle.to_tensor(0.9, dtype=self.dtype) + + rv_paddle_other = Bernoulli(other_probs) + rv_np_other = BernoulliNumpy(other_probs) + + np.testing.assert_allclose( + self.rv_paddle.kl_divergence(rv_paddle_other), + self.rv_np.kl_divergence(rv_np_other), + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + + np.testing.assert_allclose( + kl_divergence(self.rv_paddle, rv_paddle_other), + self.rv_np.kl_divergence(rv_np_other), + rtol=RTOL.get(self.dtype), + atol=ATOL.get(self.dtype), + ) + + +@place(DEVICES) +@parameterize_cls( + ( + TEST_CASE_NAME, + 'probs', + 'default_dtype', + 'expected_dtype', + 'shape', + 'expected_shape', + ), + [ + # 0-D probs + ( + 'probs_0d_1d', + paddle.full((), 0.3), + 'float32', + 'float32', + [ + 100, + ], + [ + 100, + ], + ), + ( + 'probs_0d_2d', + paddle.full((), 0.3), + 'float32', + 'float32', + [100, 1], + [100, 1], + ), + ( + 'probs_0d_3d', + paddle.full((), 0.3), + 'float32', + 'float32', + [100, 2, 3], + [100, 2, 3], + ), + # 1-D probs + ( + 'probs_1d_1d_32', + paddle.to_tensor(0.3), + 'float32', + 'float32', + [ + 100, + ], + [100, 1], + ), + ( + 'probs_1d_1d_64', + paddle.to_tensor(0.3, dtype='float64'), + 'float64', + 'float64', + paddle.to_tensor( + [ + 100, + ] + ), + [100, 1], + ), + ( + 'probs_1d_2d', + paddle.to_tensor(0.3), + 'float32', + 'float32', + [100, 2], + [100, 2, 1], + ), + ( + 'probs_1d_3d', + paddle.to_tensor(0.3), + 'float32', + 'float32', + [100, 2, 3], + [100, 2, 3, 1], + ), + # N-D probs + ( + 'probs_2d_1d', + paddle.to_tensor((0.3, 0.5)), + 'float32', + 'float32', + [ + 100, + ], + [100, 2], + ), + ( + 'probs_2d_2d', + paddle.to_tensor((0.3, 0.5)), + 'float32', + 'float32', + [100, 3], + [100, 3, 2], + ), + ( + 'probs_2d_3d', + paddle.to_tensor((0.3, 0.5)), + 'float32', + 'float32', + [100, 4, 3], + [100, 4, 3, 2], + ), + ], +) +class BernoulliTestSample(BernoulliTest): + def test_sample(self): + with paddle.fluid.dygraph.guard(self.place): + sample_np = self.rv_np.sample(self.shape) + sample_paddle = self.rv_paddle.sample(self.shape) + + self.assertEqual(list(sample_paddle.shape), self.expected_shape) + self.assertEqual(sample_paddle.dtype, self.rv_paddle.probs.dtype) + + if self.probs.ndim: + for i in range(len(self.probs)): + self.assertTrue( + _kstest( + sample_np[..., i].reshape(-1), + sample_paddle.numpy()[..., i].reshape(-1), + ) + ) + else: + self.assertTrue( + _kstest( + sample_np.reshape(-1), + sample_paddle.numpy().reshape(-1), + ) + ) + + @parameterize_func( + [ + (1.0,), + (0.1,), + ] + ) + def test_rsample(self, temperature): + """Compare two samples from `rsample` method, one from scipy `sample` and another from paddle `rsample`.""" + with paddle.fluid.dygraph.guard(self.place): + sample_np = self.rv_np.sample(self.shape) + rsample_paddle = self.rv_paddle.rsample(self.shape, temperature) + + self.assertEqual(list(rsample_paddle.shape), self.expected_shape) + self.assertEqual(rsample_paddle.dtype, self.rv_paddle.probs.dtype) + + if self.probs.ndim: + for i in range(len(self.probs)): + self.assertTrue( + _kstest( + sample_np[..., i].reshape(-1), + ( + _sigmoid(rsample_paddle.numpy()[..., i]) > 0.5 + ).reshape(-1), + temperature, + ) + ) + else: + self.assertTrue( + _kstest( + sample_np.reshape(-1), + (_sigmoid(rsample_paddle.numpy()) > 0.5).reshape(-1), + temperature, + ) + ) + + def test_rsample_backpropagation(self): + with paddle.fluid.dygraph.guard(self.place): + self.rv_paddle.probs.stop_gradient = False + rsample_paddle = self.rv_paddle.rsample(self.shape) + rsample_paddle = paddle.nn.functional.sigmoid(rsample_paddle) + grads = paddle.grad([rsample_paddle], [self.rv_paddle.probs]) + self.assertEqual(len(grads), 1) + self.assertEqual(grads[0].dtype, self.rv_paddle.probs.dtype) + self.assertEqual(grads[0].shape, self.rv_paddle.probs.shape) + + +@place(DEVICES) +@parameterize_cls([TEST_CASE_NAME], ['BernoulliTestError']) +class BernoulliTestError(unittest.TestCase): + def setUp(self): + paddle.disable_static(self.place) + + @parameterize_func( + [ + (-0.1, ValueError), + (1.1, ValueError), + (np.nan, ValueError), + (-1j + 1, TypeError), + ] + ) + def test_bad_init(self, probs, error): + with paddle.fluid.dygraph.guard(self.place): + self.assertRaises(error, Bernoulli, probs) + + @parameterize_func( + [ + ( + [0.3, 0.5], + paddle.to_tensor([0.1, 0.2, 0.3]), + ), + ] + ) + def test_bad_broadcast(self, probs, value): + with paddle.fluid.dygraph.guard(self.place): + rv = Bernoulli(probs) + self.assertRaises(ValueError, rv.cdf, value) + self.assertRaises(ValueError, rv.log_prob, value) + self.assertRaises(ValueError, rv.prob, value) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli_static.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli_static.py new file mode 100644 index 0000000000000..3390262792668 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_bernoulli_static.py @@ -0,0 +1,468 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from config import ATOL, DEVICES, RTOL +from parameterize import ( + TEST_CASE_NAME, + parameterize_cls, + parameterize_func, + place, +) +from test_distribution_bernoulli import BernoulliNumpy, _kstest, _sigmoid + +import paddle +from paddle.distribution import Bernoulli +from paddle.distribution.kl import kl_divergence + +np.random.seed(2023) +paddle.seed(2023) +paddle.enable_static() +default_dtype = paddle.get_default_dtype() + + +@place(DEVICES) +@parameterize_cls( + (TEST_CASE_NAME, 'params'), # params: name, probs, probs_other, value + [ + ( + 'params', + ( + # 1-D probs + ( + 'probs_not_iterable', + 0.3, + 0.7, + 1.0, + ), + ( + 'probs_not_iterable_and_broadcast_for_value', + 0.3, + 0.7, + np.array([[0.0, 1.0], [1.0, 0.0]], dtype=default_dtype), + ), + # N-D probs + ( + 'probs_tuple_0305', + (0.3, 0.5), + 0.7, + 1.0, + ), + ( + 'probs_tuple_03050104', + ((0.3, 0.5), (0.1, 0.4)), + 0.7, + 1.0, + ), + ), + ) + ], +) +class BernoulliTestFeature(unittest.TestCase): + def setUp(self): + self.program = paddle.static.Program() + self.executor = paddle.static.Executor(self.place) + + self.params_len = len(self.params) + + with paddle.static.program_guard(self.program): + self.init_numpy_data(self.params) + self.init_static_data(self.params) + + def init_numpy_data(self, params): + self.mean_np = [] + self.variance_np = [] + self.log_prob_np = [] + self.prob_np = [] + self.cdf_np = [] + self.entropy_np = [] + self.kl_np = [] + + for _, probs, probs_other, value in params: + rv_np = BernoulliNumpy(probs) + rv_np_other = BernoulliNumpy(probs_other) + + self.mean_np.append(rv_np.mean) + self.variance_np.append(rv_np.variance) + self.log_prob_np.append(rv_np.log_prob(value)) + self.prob_np.append(rv_np.prob(value)) + self.cdf_np.append(rv_np.cdf(value)) + self.entropy_np.append(rv_np.entropy()) + self.kl_np.append(rv_np.kl_divergence(rv_np_other)) + + def init_static_data(self, params): + with paddle.static.program_guard(self.program): + rv_paddles = [] + rv_paddles_other = [] + values = [] + for _, probs, probs_other, value in params: + if not isinstance(value, np.ndarray): + value = paddle.full([1], value, dtype=default_dtype) + else: + value = paddle.to_tensor(value, place=self.place) + + rv_paddles.append(Bernoulli(probs=paddle.to_tensor(probs))) + rv_paddles_other.append( + Bernoulli(probs=paddle.to_tensor(probs_other)) + ) + values.append(value) + + results = self.executor.run( + self.program, + feed={}, + fetch_list=[ + [ + rv_paddles[i].mean, + rv_paddles[i].variance, + rv_paddles[i].log_prob(values[i]), + rv_paddles[i].prob(values[i]), + rv_paddles[i].cdf(values[i]), + rv_paddles[i].entropy(), + rv_paddles[i].kl_divergence(rv_paddles_other[i]), + kl_divergence(rv_paddles[i], rv_paddles_other[i]), + ] + for i in range(self.params_len) + ], + ) + + self.mean_paddle = [] + self.variance_paddle = [] + self.log_prob_paddle = [] + self.prob_paddle = [] + self.cdf_paddle = [] + self.entropy_paddle = [] + self.kl_paddle = [] + self.kl_func_paddle = [] + for i in range(self.params_len): + ( + _mean, + _variance, + _log_prob, + _prob, + _cdf, + _entropy, + _kl, + _kl_func, + ) = results[i * 8 : (i + 1) * 8] + self.mean_paddle.append(_mean) + self.variance_paddle.append(_variance) + self.log_prob_paddle.append(_log_prob) + self.prob_paddle.append(_prob) + self.cdf_paddle.append(_cdf) + self.entropy_paddle.append(_entropy) + self.kl_paddle.append(_kl) + self.kl_func_paddle.append(_kl_func) + + def test_all(self): + for i in range(self.params_len): + self._test_mean(i) + self._test_variance(i) + self._test_log_prob(i) + self._test_prob(i) + self._test_cdf(i) + self._test_entropy(i) + self._test_kl_divergence(i) + + def _test_mean(self, i): + np.testing.assert_allclose( + self.mean_np[i], + self.mean_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + def _test_variance(self, i): + np.testing.assert_allclose( + self.variance_np[i], + self.variance_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + def _test_log_prob(self, i): + np.testing.assert_allclose( + self.log_prob_np[i], + self.log_prob_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + def _test_prob(self, i): + np.testing.assert_allclose( + self.prob_np[i], + self.prob_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + def _test_cdf(self, i): + np.testing.assert_allclose( + self.cdf_np[i], + self.cdf_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + def _test_entropy(self, i): + np.testing.assert_allclose( + self.entropy_np[i], + self.entropy_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + def _test_kl_divergence(self, i): + np.testing.assert_allclose( + self.kl_np[i], + self.kl_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + np.testing.assert_allclose( + self.kl_np[i], + self.kl_func_paddle[i], + rtol=RTOL.get(default_dtype), + atol=ATOL.get(default_dtype), + ) + + +@place(DEVICES) +@parameterize_cls( + (TEST_CASE_NAME, 'probs', 'shape', 'temperature', 'expected_shape'), + [ + # 1-D probs + ( + 'probs_03', + (0.3,), + [ + 100, + ], + 0.1, + [100, 1], + ), + # N-D probs + ( + 'probs_0305', + (0.3, 0.5), + [ + 100, + ], + 0.1, + [100, 2], + ), + ], +) +class BernoulliTestSample(unittest.TestCase): + def setUp(self): + self.program = paddle.static.Program() + self.executor = paddle.static.Executor(self.place) + + with paddle.static.program_guard(self.program): + self.init_numpy_data(self.probs, self.shape) + self.init_static_data(self.probs, self.shape, self.temperature) + + def init_numpy_data(self, probs, shape): + self.rv_np = BernoulliNumpy(probs) + self.sample_np = self.rv_np.sample(shape) + + def init_static_data(self, probs, shape, temperature): + with paddle.static.program_guard(self.program): + self.rv_paddle = Bernoulli(probs=paddle.to_tensor(probs)) + + [self.sample_paddle, self.rsample_paddle] = self.executor.run( + self.program, + feed={}, + fetch_list=[ + self.rv_paddle.sample(shape), + self.rv_paddle.rsample(shape, temperature), + ], + ) + + def test_sample(self): + with paddle.static.program_guard(self.program): + self.assertEqual( + list(self.sample_paddle.shape), self.expected_shape + ) + + for i in range(len(self.probs)): + self.assertTrue( + _kstest( + self.sample_np[..., i].reshape(-1), + self.sample_paddle[..., i].reshape(-1), + ) + ) + + def test_rsample(self): + """Compare two samples from `rsample` method, one from scipy and another from paddle.""" + with paddle.static.program_guard(self.program): + self.assertEqual( + list(self.rsample_paddle.shape), self.expected_shape + ) + + for i in range(len(self.probs)): + self.assertTrue( + _kstest( + self.sample_np[..., i].reshape(-1), + (_sigmoid(self.rsample_paddle[..., i]) > 0.5).reshape( + -1 + ), + self.temperature, + ) + ) + + +@place(DEVICES) +@parameterize_cls([TEST_CASE_NAME], ['BernoulliTestError']) +class BernoulliTestError(unittest.TestCase): + def setUp(self): + self.program = paddle.static.Program() + self.executor = paddle.static.Executor(self.place) + + @parameterize_func( + [ + (0,), # int + ((0.3,),), # tuple + ( + [ + 0.3, + ], + ), # list + ( + np.array( + [ + 0.3, + ] + ), + ), # ndarray + (-1j + 1,), # complex + ('0',), # str + ] + ) + def test_bad_init_type(self, probs): + with paddle.static.program_guard(self.program): + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[Bernoulli(probs=probs)] + ) + + @parameterize_func( + [ + (100,), # int + (100.0,), # float + ] + ) + def test_bad_sample_shape_type(self, shape): + with paddle.static.program_guard(self.program): + rv = Bernoulli(0.3) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.sample(shape)] + ) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.rsample(shape)] + ) + + @parameterize_func( + [ + (1,), # int + ] + ) + def test_bad_rsample_temperature_type(self, temperature): + with paddle.static.program_guard(self.program): + rv = Bernoulli(0.3) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, + feed={}, + fetch_list=[rv.rsample([100], temperature)], + ) + + @parameterize_func( + [ + (1,), # int + (1.0,), # float + ([1.0],), # list + ((1.0),), # tuple + (np.array(1.0),), # ndarray + ] + ) + def test_bad_value_type(self, value): + with paddle.static.program_guard(self.program): + rv = Bernoulli(0.3) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.log_prob(value)] + ) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.prob(value)] + ) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.cdf(value)] + ) + + @parameterize_func( + [ + (np.array(1.0),), # ndarray or other distribution + ] + ) + def test_bad_kl_other_type(self, other): + with paddle.static.program_guard(self.program): + rv = Bernoulli(0.3) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.kl_divergence(other)] + ) + + @parameterize_func( + [ + (paddle.to_tensor([0.1, 0.2, 0.3]),), + ] + ) + def test_bad_broadcast(self, value): + with paddle.static.program_guard(self.program): + rv = Bernoulli(paddle.to_tensor([0.3, 0.5])) + + # `logits, value = paddle.broadcast_tensors([self.logits, value])` + # raise ValueError in dygraph, raise TypeError in static. + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.cdf(value)] + ) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.log_prob(value)] + ) + + with self.assertRaises(TypeError): + [_] = self.executor.run( + self.program, feed={}, fetch_list=[rv.prob(value)] + ) + + +if __name__ == '__main__': + unittest.main() From b0f17d05b952321cf1011e90036cfbc39381a9c3 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 12 Apr 2023 10:20:25 +0800 Subject: [PATCH 28/59] [Prim] Add instance_norm composite rule (#52203) * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * isamp * gpu * cpu * noamp * fix instance_norm * fix * fix unit test * fix unit test * add unit test * fix * add big data tests * fix * fix * fix * fix * fix * fix * fix * add test case * fix * fix * fix * fix * fix * remove amp test --------- Co-authored-by: heyanru01 <429520051@qq.com> --- .../tests/unittests/test_instance_norm_op.py | 639 +++++++++++++++++- .../incubate/autograd/composite_rules.py | 30 + 2 files changed, 667 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py index 9e4445b7575cd..bab904db6eef0 100644 --- a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py @@ -15,9 +15,11 @@ import unittest import numpy as np +import parameterized as param +from eager_op_test import OpTest import paddle -from paddle import fluid +from paddle import fluid, nn from paddle.fluid import Program, core, program_guard from paddle.fluid.dygraph import to_variable @@ -33,7 +35,7 @@ def _reference_instance_norm_naive(x, scale, bias, epsilon, mean, var): var_tile = np.reshape(var, (n, c, 1, 1)) var_tile = np.tile(var_tile, (1, 1, h, w)) - x_norm = (x - mean_tile) / np.sqrt(var_tile + epsilon).astype('float32') + x_norm = (x - mean_tile) / np.sqrt(var_tile + epsilon) scale_tile = np.reshape(scale, (1, c, 1, 1)) scale_tile = np.tile(scale_tile, (n, 1, h, w)) bias_tile = np.reshape(bias, (1, c, 1, 1)) @@ -84,6 +86,633 @@ def _cal_mean_variance(x, epsilon, mean_shape): return mean, var +def instance_norm_wrapper(x, weight=None, bias=None, esp=1e-05): + return paddle.nn.functional.instance_norm( + x, None, None, weight, bias, True, 0.9, esp + ) + + +class TestInstanceNormOp(OpTest): + def setUp(self): + self.op_type = "instance_norm" + self.prim_op_type = "comp" + self.python_api = instance_norm_wrapper + self.public_python_api = instance_norm_wrapper + self.python_out_sig = ['Y'] + self.fw_comp_rtol = 1e-6 + self.fw_comp_atol = 1e-6 + self.rev_comp_rtol = 1e-4 + self.rev_comp_atol = 1e-4 + self.init_test_case() + ref_y_np, ref_mean_np, ref_var_np_tmp = _reference_instance_norm_naive( + self.x_np, + self.scale_np, + self.bias_np, + self.epsilon, + self.mean_np, + self.var_np, + ) + + ref_var_np = 1 / np.sqrt(ref_var_np_tmp + self.epsilon) + self.inputs = { + 'X': self.x_np, + 'Scale': self.scale_np, + 'Bias': self.bias_np, + } + self.attrs = {'epsilon': self.epsilon} + self.outputs = { + 'Y': ref_y_np, + 'SavedMean': ref_mean_np, + 'SavedVariance': ref_var_np, + } + self.enable_cinn = False + + def test_check_output(self): + self.check_output(check_prim=True) + + def test_check_grad(self): + self.check_grad(['X', 'Scale', 'Bias'], 'Y', check_prim=True) + + def init_test_case(self): + x_shape = [2, 100, 4, 5] + n, c, h, w = x_shape[0], x_shape[1], x_shape[2], x_shape[3] + self.epsilon = 1e-05 + dtype = np.float32 + scale_shape = [c] + mean_shape = [n * c] + np.random.seed() + self.x_np = np.random.random_sample(x_shape).astype(dtype) + self.scale_np = np.random.random_sample(scale_shape).astype(dtype) + self.bias_np = np.random.random_sample(scale_shape).astype(dtype) + self.mean_np, self.var_np = _cal_mean_variance( + self.x_np, self.epsilon, mean_shape + ) + self.dtype = dtype + + +class TestInstanceNormFP64(TestInstanceNormOp): + def init_test_case(self): + x_shape = [2, 100, 4, 5] + n, c, h, w = x_shape[0], x_shape[1], x_shape[2], x_shape[3] + self.epsilon = 1e-5 + dtype = np.float64 + scale_shape = [c] + mean_shape = [n * c] + np.random.seed() + self.x_np = np.random.random_sample(x_shape).astype(dtype) + self.scale_np = np.ones(scale_shape).astype(dtype) + self.bias_np = np.zeros(scale_shape).astype(dtype) + self.mean_np, self.var_np = _cal_mean_variance( + self.x_np, self.epsilon, mean_shape + ) + self.fw_comp_rtol = 1e-14 + self.fw_comp_atol = 1e-14 + self.rev_comp_rtol = 1e-13 + self.rev_comp_atol = 1e-13 + self.dtype = dtype + + +class PrimGroupNorm(paddle.nn.Layer): + def __init__(self, num_channels, scale, bias): + super().__init__() + self.func = nn.InstanceNorm2D(num_channels) + paddle.assign(scale, self.func.scale) + paddle.assign(bias, self.func.bias) + + def forward(self, x): + out = self.func(x) + return out + + +def apply_to_static(net, use_cinn): + build_strategy = paddle.static.BuildStrategy() + build_strategy.build_cinn_pass = use_cinn + return paddle.jit.to_static(net, build_strategy=False) + + +places = [paddle.CPUPlace()] +if paddle.is_compiled_with_cuda(): + places.append(paddle.CUDAPlace(0)) + + +@param.parameterized_class( + ( + 'name', + 'shape', + 'epsilon', + 'data_format', + 'places', + 'dtype', + 'threshold_list', + 'special_threshold', + ), + ( + ( + 'test0', + (2, 100, 3, 5), + 1e-5, + 'NCHW', + places, + 'float32', + [ + [1e-5, 1e-5, 1e-5], # cpu thresholds for static + [1e-5, 1e-5, 1e-5], # gpu thresholds for static + ], + None, + ), + ( + 'test1', + (2, 100, 3, 5), + 1e-5, + 'NCHW', + places, + 'float32', + [ + [1e-5, 1e-5, 1e-5], # cpu thresholds for static + [1e-5, 1e-5, 1e-5], # gpu thresholds for static + ], + None, + ), + ( + 'testbigdata_fp32', + (8, 32, 32, 64), + 1e-5, + 'NCHW', + places, + 'float32', + [ + [1e-5, 1e-5, 1e-5], # cpu thresholds for static + [1e-5, 1e-5, 1e-5], # gpu thresholds for static + ], # gpu thresholds + [2e-2, 2e-2, 2e-2], # special grad threshold for scale + ), + ( + 'test0_fp64', + (2, 100, 3, 5), + 1e-5, + 'NCHW', + places, + 'float64', + [ + [1e-14, 1e-14, 1e-14], # cpu thresholds for static + [1e-14, 1e-14, 1e-14], # gpu thresholds for static + ], + [1e-13, 1e-13, 1e-13], + ), + ( + 'test1_fp64', + (2, 100, 3, 5), + 1e-5, + 'NCHW', + places, + 'float64', + [ + [1e-14, 1e-14, 1e-14], # cpu thresholds for static + [1e-14, 1e-14, 1e-14], # gpu thresholds for static + ], + [1e-13, 1e-13, 1e-13], + ), + ( + 'testbigdata_fp64', + (8, 32, 32, 64), + 1e-5, + 'NCHW', + places, + 'float64', + [ + [1e-14, 1e-14, 1e-14], # cpu thresholds + [1e-14, 1e-14, 1e-14], + ], # gpu thresholds + [5e-11, 5e-11, 5e-11], # for X_grad + ), + ), +) +class TestCompositeInstanceNormNorm(unittest.TestCase): + @classmethod + def setUpClass(cls): + core._set_prim_all_enabled(True) + + @classmethod + def tearDownClass(cls): + core._set_prim_all_enabled(False) + + def setUp(self): + np.random.seed(1234) + self.fwd_desire = [] + self.rev_desire = [] + self.x = np.random.random(self.shape).astype(self.dtype) + self.scale = np.random.random([self.shape[1]]).astype(self.dtype) + self.bias = np.random.random([self.shape[1]]).astype(self.dtype) + self.num_channels = self.shape[1] + + self.static_fwd_desire = [] + self.static_rev_desire = [] + for place in self.places: + fwd_desire, rev_desire = self.get_eager_desire(place) + self.fwd_desire.append(fwd_desire.numpy()) + self.rev_desire.append(rev_desire.numpy()) + self.static_fwd_desire.append([]) + self.static_rev_desire.append([]) + fwd, rev = self.get_static_desire(place) + self.static_fwd_desire[-1].append(fwd[0]) + self.static_fwd_desire[-1].append(fwd[1]) + self.static_fwd_desire[-1].append(fwd[2]) + self.static_rev_desire[-1].append(rev[0]) + self.static_rev_desire[-1].append(rev[1]) + self.static_rev_desire[-1].append(rev[2]) + + def get_eager_desire(self, place): + if isinstance(place, fluid.CPUPlace): + paddle.set_device("cpu") + if isinstance(place, fluid.CUDAPlace): + paddle.set_device("gpu") + core.set_prim_eager_enabled(False) + paddle.disable_static() + input_ = paddle.to_tensor( + data=self.x, dtype=self.dtype, place=place, stop_gradient=False + ) + scale_ = paddle.to_tensor( + data=self.scale, dtype=self.dtype, place=place, stop_gradient=False + ) + bias_ = paddle.to_tensor( + data=self.bias, dtype=self.dtype, place=place, stop_gradient=False + ) + output = paddle.nn.functional.instance_norm( + input_, None, None, scale_, bias_, True, 0.9, self.epsilon + ) + grad = paddle.grad(output, input_) + + return output, grad[0] + + def get_static_desire(self, place): + core._set_prim_all_enabled(False) + paddle.enable_static() + if isinstance(place, fluid.CPUPlace): + paddle.set_device("cpu") + if isinstance(place, fluid.CUDAPlace): + paddle.set_device("gpu") + + mp, sp = paddle.static.Program(), paddle.static.Program() + with paddle.static.program_guard(mp, sp): + input_ = paddle.static.data( + 'x', shape=self.x.shape, dtype=self.x.dtype + ) + input_.stop_gradient = False + + scale_ = paddle.static.data( + 'scale_', shape=self.scale.shape, dtype=self.scale.dtype + ) + scale_.stop_gradient = False + + bias_ = paddle.static.data( + 'bias_', shape=self.bias.shape, dtype=self.bias.dtype + ) + bias_.stop_gradient = False + + output = paddle.nn.functional.instance_norm( + input_, None, None, scale_, bias_, True, 0.9, self.epsilon + ) + + blocks = mp.blocks + names = dict( + zip( + blocks[0].ops[0].output_names, + blocks[0].ops[0].output_arg_names, + ) + ) + vars_list = [ + names[key] + for key in [ + "Y", + "SavedMean", + "SavedVariance", + ] + ] + + fwd_ops = [op.type for op in blocks[0].ops] + # Ensure that instance_norm in original block + assert 'instance_norm' in fwd_ops + + if core._is_fwd_prim_enabled(): + paddle.incubate.autograd.primapi.to_prim(mp.blocks) + fwd_ops_new = [op.type for op in blocks[0].ops] + # Ensure that instance_norm is splitted into small ops + assert 'instance_norm' not in fwd_ops_new + + grads = paddle.static.gradients([output], [input_, scale_, bias_]) + + exe = paddle.static.Executor(place) + exe.run(sp) + out_list = exe.run( + mp, + feed={ + input_.name: self.x, + scale_.name: self.scale, + bias_.name: self.bias, + }, + fetch_list=vars_list + [grads], + ) + paddle.disable_static() + core._set_prim_all_enabled(True) + + return out_list[:3], out_list[3:] + + def test_static_comp(self): + paddle.enable_static() + mps = [] + fwd_actual = [] + rev_actual = [] + if len(self.places) < 1: + return + + with paddle.fluid.framework._static_guard(): + for place in self.places: + fwd_actual.append([]) + rev_actual.append([]) + mp, sp = paddle.static.Program(), paddle.static.Program() + with paddle.static.program_guard(mp, sp): + input_ = paddle.static.data( + 'x', shape=self.x.shape, dtype=self.x.dtype + ) + input_.stop_gradient = False + + scale_ = paddle.static.data( + 'scale_', shape=self.scale.shape, dtype=self.scale.dtype + ) + scale_.stop_gradient = False + + bias_ = paddle.static.data( + 'bias_', shape=self.bias.shape, dtype=self.bias.dtype + ) + bias_.stop_gradient = False + + output = paddle.nn.functional.instance_norm( + input_, + None, + None, + scale_, + bias_, + True, + 0.9, + self.epsilon, + ) + + blocks = mp.blocks + names = dict( + zip( + blocks[0].ops[0].output_names, + blocks[0].ops[0].output_arg_names, + ) + ) + vars_list = [ + names[key] + for key in [ + "Y", + "SavedMean", + "SavedVariance", + ] + ] + + fwd_ops = [op.type for op in blocks[0].ops] + # Ensure that instance_norm in original block + assert 'instance_norm' in fwd_ops + + if core._is_fwd_prim_enabled(): + paddle.incubate.autograd.primapi.to_prim(mp.blocks) + fwd_ops_new = [op.type for op in blocks[0].ops] + # Ensure that instance_norm is splitted into small ops + assert 'instance_norm' not in fwd_ops_new + + grads = paddle.static.gradients( + output, [input_, scale_, bias_] + ) + exe = paddle.static.Executor(place) + exe.run(sp) + out_list = exe.run( + mp, + feed={ + input_.name: self.x, + scale_.name: self.scale, + bias_.name: self.bias, + }, + fetch_list=vars_list + [grads], + ) + fwd_actual[-1].append(out_list[0]) + fwd_actual[-1].append(out_list[1]) + fwd_actual[-1].append(out_list[2]) + rev_actual[-1].append(out_list[3]) + rev_actual[-1].append(out_list[4]) + rev_actual[-1].append(out_list[5]) + mps.append(mp) + + vars_name = [ + "Y", + "SavedMean", + "SavedVariance", + "X_grad", + "Scale_grad", + "Bias_grad", + ] + + for i in range(len(self.places)): + self.assertTrue( + 'instance_norm' not in [op.type for op in mps[i].block(0).ops] + ) + atol = self.threshold_list[i][0] + rtol = self.threshold_list[i][0] + for j in range(len(self.static_fwd_desire[i])): + # in float16 type, Y is float16, mean and var are float16 + # so check mean and var with float32 gpu threshold + if self.dtype == 'float16' and j > 0: + atol = 1e-5 + rtol = 1e-5 + + np.testing.assert_allclose( + self.static_fwd_desire[i][j], + fwd_actual[i][j], + rtol=rtol, + atol=atol, + err_msg=f"Check diff failed of place:{self.places[i]}, output: {vars_name[j]}", + ) + max_abs_diff = np.max( + np.abs(self.static_fwd_desire[i][j] - fwd_actual[i][j]) + ) + print( + self.shape, + self.dtype, + self.places[i], + vars_name[j], + max_abs_diff, + ) + # compare with eager_desire + np.testing.assert_allclose( + self.fwd_desire[i], + fwd_actual[i][0], + rtol=rtol, + atol=atol, + err_msg=f"Check diff failed with fwd_eager:{self.places[i]}", + ) + + for j in range(len(self.static_rev_desire[i])): + if self.special_threshold is not None and j <= 1: + atol = self.special_threshold[i] + rtol = self.special_threshold[i] + else: + atol = self.threshold_list[i][0] + rtol = self.threshold_list[i][0] + + max_abs_diff = np.max( + np.abs(self.static_rev_desire[i][j] - rev_actual[i][j]) + ) + + print( + self.shape, + self.dtype, + self.places[i], + vars_name[j + 3], + max_abs_diff, + ) + + np.testing.assert_allclose( + self.static_rev_desire[i][j], + rev_actual[i][j], + rtol=rtol, + atol=atol, + err_msg=f"Check diff failed of place:{self.places[i]}, output: {vars_name[j + 3]}", + ) + + # now use larger threshold when testing cpu grads to bypass cpu grad test + if self.special_threshold is not None and i == 0: + atol = self.special_threshold[i] + rtol = self.special_threshold[i] + # compare with eager_desire + np.testing.assert_allclose( + self.rev_desire[i], + rev_actual[i][0], + rtol=rtol, + atol=atol, + err_msg=f"Check diff failed with rev_eager:{self.places[i]}", + ) + + paddle.disable_static() + + def test_jit_comp(self): + fwd_actual = [] + rev_actual = [] + for place in self.places: + input_ = paddle.to_tensor( + data=self.x, dtype=self.dtype, place=place, stop_gradient=False + ) + scale_ = paddle.to_tensor( + data=self.scale, + dtype=self.dtype, + place=place, + stop_gradient=False, + ) + bias_ = paddle.to_tensor( + data=self.bias, + dtype=self.dtype, + place=place, + stop_gradient=False, + ) + net = PrimGroupNorm(self.num_channels, scale_, bias_) + net = apply_to_static(net, False) + output = net(input_) + + grad = paddle.grad(output, input_) + fwd_actual.append(output.numpy()) + rev_actual.append(grad[0].numpy()) + + for i in range(len(self.places)): + atol = self.threshold_list[i][1] + rtol = self.threshold_list[i][1] + np.testing.assert_allclose( + self.fwd_desire[i], + fwd_actual[i], + rtol=rtol, + atol=atol, + err_msg='%s jit fwd' % self.places[i], + ) + + # now use larger threshold when testing cpu grads to bypass cpu grad test + if self.special_threshold is not None: + atol = self.special_threshold[i] + rtol = self.special_threshold[i] + + np.testing.assert_allclose( + self.rev_desire[i], + rev_actual[i], + rtol=rtol, + atol=atol, + err_msg='%s jit rev' % self.places[i], + ) + + def test_jit_comp_with_cinn(self): + fwd_actual = [] + rev_actual = [] + for place in self.places: + input_ = paddle.to_tensor( + data=self.x, dtype=self.dtype, place=place, stop_gradient=False + ) + scale_ = paddle.to_tensor( + data=self.scale, + dtype=self.dtype, + place=place, + stop_gradient=False, + ) + bias_ = paddle.to_tensor( + data=self.bias, + dtype=self.dtype, + place=place, + stop_gradient=False, + ) + net = PrimGroupNorm(self.num_channels, scale_, bias_) + net = apply_to_static(net, False) + output = net(input_) + grad = paddle.grad(output, input_) + fwd_actual.append(output.numpy()) + rev_actual.append(grad[0].numpy()) + + for i in range(len(self.places)): + atol = self.threshold_list[i][2] + rtol = self.threshold_list[i][2] + np.testing.assert_allclose( + self.fwd_desire[i], + fwd_actual[i], + rtol=rtol, # mean of uniform distribution, scale for avoid random failed + atol=atol, + err_msg='%s jit_cinn fwd' % self.places[i], + ) + # now use larger threshold when testing cpu grads to bypass cpu grad test + if self.special_threshold is not None: + atol = self.special_threshold[i] + rtol = self.special_threshold[i] + np.testing.assert_allclose( + self.rev_desire[i], + rev_actual[i], + rtol=rtol, # mean of uniform distribution, scale for avoid random failed + atol=atol, + err_msg='%s jit_cinn rev' % self.places[i], + ) + + +class TestInstanceNormCase1(TestInstanceNormOp): + def init_test_case(self): + x_shape = [2, 100, 4, 5] + n, c, h, w = x_shape[0], x_shape[1], x_shape[2], x_shape[3] + self.epsilon = 1e-05 + dtype = np.float32 + scale_shape = [c] + mean_shape = [n * c] + np.random.seed() + self.x_np = np.random.random_sample(x_shape).astype(dtype) + self.scale_np = np.ones(scale_shape).astype(dtype) + self.bias_np = np.zeros(scale_shape).astype(dtype) + self.mean_np, self.var_np = _cal_mean_variance( + self.x_np, self.epsilon, mean_shape + ) + + class TestInstanceNormOpTraining(unittest.TestCase): def setUp(self): self.epsilon = 1e-5 @@ -112,6 +741,7 @@ def set_global_mean_var(self, mean_shape, x): def test_forward_backward(self): def test_with_place(place, shape): + paddle.enable_static() epsilon = self.epsilon n, c, h, w = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] @@ -207,6 +837,7 @@ def test_with_place(place, shape): for id, name in enumerate(self.fetch_list): self.__assert_close(var_dict[name], out[id], name) print("op test forward passes: ", str(place)) + paddle.disable_static() places = [core.CPUPlace()] @@ -234,6 +865,7 @@ def init_test_case(self): class TestInstanceNormOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program(), Program()): # the input of instance_norm must be Variable. x1 = fluid.create_lod_tensor( @@ -246,14 +878,17 @@ def test_errors(self): name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32" ) self.assertRaises(TypeError, paddle.static.nn.instance_norm, x2) + paddle.disable_static() class TestInstanceNormOpErrorCase1(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program(), Program()): # the first dimension of input for instance_norm must between [2d, 5d] x = paddle.static.data(name='x', shape=[3], dtype="float32") self.assertRaises(ValueError, paddle.static.nn.instance_norm, x) + paddle.disable_static() class TestElasticNormOp(unittest.TestCase): diff --git a/python/paddle/incubate/autograd/composite_rules.py b/python/paddle/incubate/autograd/composite_rules.py index 84b7d415638b2..9d0229627728d 100644 --- a/python/paddle/incubate/autograd/composite_rules.py +++ b/python/paddle/incubate/autograd/composite_rules.py @@ -178,6 +178,36 @@ def layernorm_composite(x, scale, bias, epsilon, begin_norm_axis): return out, mean_, variance +@REGISTER_COMPOSITE('instance_norm') +def instancenorm_composite(x, scale, bias, epsilon): + """ + define composite rule of op instance_norm + out = (x - mean(x)) / sqrt(var + epsilon)) + var = mean((x-mean(x))^2) + """ + n, c, h, w = x.shape + axis = tuple(range(2, len(x.shape))) + mean_ = mean(x, axis=axis, keepdim=True) + difference = x - mean_ + var_tmp1 = difference * difference + variance = mean(var_tmp1, axis=axis, keepdim=True) + var_tmp3 = variance + epsilon + sqrt_var = pow(var_tmp3, full([], 0.5, dtype=var_tmp3.dtype)) + out = difference / sqrt_var + + if scale is not None: + scale_tile = reshape(scale, [1, c, 1, 1]) + out = out * scale_tile + if bias is not None: + bias_tile = reshape(bias, [1, c, 1, 1]) + out = out + bias_tile + + mean_ = reshape(mean_, [-1]) + saved_variance = 1 / sqrt_var + saved_variance = reshape(saved_variance, [-1]) + return out, mean_, saved_variance + + @REGISTER_COMPOSITE('gelu') def gelu_composite(x, approximate): """define composite rule of op gelu""" From a20605682ea1fab07c861123676719f9cc97527a Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Wed, 12 Apr 2023 10:20:50 +0800 Subject: [PATCH 29/59] Modify LayerNorm Composite Rule (#52712) * [Do NOT merge] Expr PR on Composite * Expr PR on Composite * Revert some compsite experiment * Remove unnecessary composite code * Add rsqrt as sub primitives --- python/paddle/incubate/autograd/composite_rules.py | 4 ++-- python/paddle/incubate/autograd/primitives.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/paddle/incubate/autograd/composite_rules.py b/python/paddle/incubate/autograd/composite_rules.py index 9d0229627728d..ba92c5dba718d 100644 --- a/python/paddle/incubate/autograd/composite_rules.py +++ b/python/paddle/incubate/autograd/composite_rules.py @@ -160,8 +160,8 @@ def layernorm_composite(x, scale, bias, epsilon, begin_norm_axis): var_tmp1 = difference * difference variance = mean(var_tmp1, axis=axis, keepdim=True) var_tmp3 = variance + epsilon - sqrt_var = sqrt(var_tmp3) - out = difference / sqrt_var + rsqrt_var = rsqrt(var_tmp3) + out = difference * rsqrt_var if scale is not None: scale = reshape(scale, x.shape[begin_norm_axis:]) diff --git a/python/paddle/incubate/autograd/primitives.py b/python/paddle/incubate/autograd/primitives.py index cc8ba89423d7c..9f52d9d69ac23 100644 --- a/python/paddle/incubate/autograd/primitives.py +++ b/python/paddle/incubate/autograd/primitives.py @@ -50,6 +50,7 @@ from paddle.tensor import pow # noqa: F401 from paddle.tensor import prod # noqa: F401 from paddle.tensor import reshape # noqa: F401 +from paddle.tensor import rsqrt # noqa: F401 from paddle.tensor import sign # noqa: F401 from paddle.tensor import sin # noqa: F401 from paddle.tensor import sinh # noqa: F401 @@ -117,6 +118,7 @@ 'ones', 'zeros', 'sqrt', + 'rsqrt', ] others = [ From 523f8a266f8930213343fb3179f83b032874544b Mon Sep 17 00:00:00 2001 From: Guoxia Wang Date: Wed, 12 Apr 2023 10:21:25 +0800 Subject: [PATCH 30/59] [AMP OP&Test] support bf16 for batch norm (#52407) * [AMP OP&Test] support bf16 for batchnorm * codestyle * Update batch_norm_grad_kernel.cu * Update batch_norm_kernel.cu * fix codestyle * fix * fix * fix * fix * fix * Update batch_norm_kernel.cc --- paddle/phi/kernels/batch_norm_kernel.cc | 18 ++++++ .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 36 +++++++++++- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 24 ++++++++ .../tests/unittests/test_batch_norm_op.py | 56 +++++++++++++++++-- 4 files changed, 127 insertions(+), 7 deletions(-) diff --git a/paddle/phi/kernels/batch_norm_kernel.cc b/paddle/phi/kernels/batch_norm_kernel.cc index eddd65184fe93..570ba8dae06cf 100644 --- a/paddle/phi/kernels/batch_norm_kernel.cc +++ b/paddle/phi/kernels/batch_norm_kernel.cc @@ -14,6 +14,7 @@ #include "paddle/phi/kernels/batch_norm_kernel.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" @@ -66,6 +67,22 @@ PD_REGISTER_KERNEL(batch_norm_infer, float, double) {} #ifdef PADDLE_WITH_CUDA +#if CUDNN_VERSION_MIN(8, 1, 0) +PD_REGISTER_KERNEL(batch_norm_infer, + GPU, + ALL_LAYOUT, + phi::BatchNormInferKernel, + float, + double, + phi::dtype::bfloat16, + phi::dtype::float16) { + if (kernel_key.dtype() == phi::DataType::FLOAT16 || + kernel_key.dtype() == phi::DataType::BFLOAT16) { + kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); + kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); + } +} +#else PD_REGISTER_KERNEL(batch_norm_infer, GPU, ALL_LAYOUT, @@ -79,6 +96,7 @@ PD_REGISTER_KERNEL(batch_norm_infer, } } #endif +#endif #ifdef PADDLE_WITH_HIP PD_REGISTER_KERNEL(batch_norm_infer, GPU, diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index ede2458744902..db7f3c3224a03 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -1314,14 +1314,18 @@ PD_REGISTER_KERNEL(batch_norm_grad_raw, float, phi::dtype::float16) {} #else +#if CUDNN_VERSION_MIN(8, 1, 0) + PD_REGISTER_KERNEL(batch_norm_grad, GPU, ALL_LAYOUT, phi::BatchNormGradKernel, float, double, + phi::dtype::bfloat16, phi::dtype::float16) { - if (kernel_key.dtype() == phi::DataType::FLOAT16) { + if (kernel_key.dtype() == phi::DataType::FLOAT16 || + kernel_key.dtype() == phi::DataType::BFLOAT16) { kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad @@ -1334,6 +1338,22 @@ PD_REGISTER_KERNEL(batch_norm_grad_raw, phi::BatchNormGradRawKernel, float, double, + phi::dtype::bfloat16, + phi::dtype::float16) { + if (kernel_key.dtype() == phi::DataType::FLOAT16 || + kernel_key.dtype() == phi::DataType::BFLOAT16) { + kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad + kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad + kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad + } +} +#else +PD_REGISTER_KERNEL(batch_norm_grad, + GPU, + ALL_LAYOUT, + phi::BatchNormGradKernel, + float, + double, phi::dtype::float16) { if (kernel_key.dtype() == phi::DataType::FLOAT16) { kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad @@ -1342,6 +1362,20 @@ PD_REGISTER_KERNEL(batch_norm_grad_raw, } } +PD_REGISTER_KERNEL(batch_norm_grad_raw, + GPU, + ALL_LAYOUT, + phi::BatchNormGradRawKernel, + float, + double, + phi::dtype::float16) { + if (kernel_key.dtype() == phi::DataType::FLOAT16) { + kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad + kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad + kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad + } +} +#endif #endif #ifdef PADDLE_WITH_HIP diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 63276e4d53024..fb1bca3daba86 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -1221,6 +1221,7 @@ PD_REGISTER_KERNEL(batch_norm, ALL_LAYOUT, phi::BatchNormKernel, float, + phi::dtype::bfloat16, phi::dtype::float16) { kernel->InputAt(1).SetDataType(phi::DataType::FLOAT32); kernel->InputAt(2).SetDataType(phi::DataType::FLOAT32); @@ -1232,6 +1233,28 @@ PD_REGISTER_KERNEL(batch_norm, kernel->OutputAt(4).SetDataType(phi::DataType::FLOAT32); } #else +#if CUDNN_VERSION_MIN(8, 1, 0) +PD_REGISTER_KERNEL(batch_norm, + GPU, + ALL_LAYOUT, + phi::BatchNormKernel, + float, + double, + phi::dtype::bfloat16, + phi::dtype::float16) { + if (kernel_key.dtype() == phi::DataType::FLOAT16 || + kernel_key.dtype() == phi::DataType::BFLOAT16) { + kernel->InputAt(1).SetDataType(phi::DataType::FLOAT32); + kernel->InputAt(2).SetDataType(phi::DataType::FLOAT32); + kernel->InputAt(3).SetDataType(phi::DataType::FLOAT32); + kernel->InputAt(4).SetDataType(phi::DataType::FLOAT32); + kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); + kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); + kernel->OutputAt(3).SetDataType(phi::DataType::FLOAT32); + kernel->OutputAt(4).SetDataType(phi::DataType::FLOAT32); + } +} +#else PD_REGISTER_KERNEL(batch_norm, GPU, ALL_LAYOUT, @@ -1250,5 +1273,6 @@ PD_REGISTER_KERNEL(batch_norm, kernel->OutputAt(4).SetDataType(phi::DataType::FLOAT32); } } +#endif #endif diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index 86ffea08a2254..bbe322ae0175b 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -16,7 +16,12 @@ import unittest import numpy as np -from eager_op_test import OpTest, _set_use_system_allocator +from eager_op_test import ( + OpTest, + _set_use_system_allocator, + convert_float_to_uint16, + convert_uint16_to_float, +) from op import Operator import paddle @@ -239,7 +244,10 @@ def check_with_place(self, place, data_layout, dtype, shape): raise ValueError("Unknown data layout.") scale_shape = [c] - x_val = np.random.random_sample(x_shape).astype(dtype) + if dtype == np.uint16: + x_val = np.random.random_sample(x_shape).astype(np.float32) + else: + x_val = np.random.random_sample(x_shape).astype(dtype) # generate some negative values to test case with relu fused x_val = x_val - 0.5 scale_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -248,12 +256,20 @@ def check_with_place(self, place, data_layout, dtype, shape): mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) - y_out = _reference_testing( - x_val, scale_val, bias_val, mean, variance, epsilon, data_layout - ).astype(dtype) + if dtype == np.uint16: + y_out = _reference_testing( + x_val, scale_val, bias_val, mean, variance, epsilon, data_layout + ).astype(np.float32) + y_out = convert_float_to_uint16(y_out) + else: + y_out = _reference_testing( + x_val, scale_val, bias_val, mean, variance, epsilon, data_layout + ).astype(dtype) if self.fuse_with_relu: y_out = np.maximum(y_out, 0) + if dtype == np.uint16: + x_val = convert_float_to_uint16(x_val) scope = core.Scope() # create input @@ -324,6 +340,11 @@ def check_with_place(self, place, data_layout, dtype, shape): y_tensor._set_dims(dims) # check inference result + atol = 1e-3 + if dtype == np.uint16: + y_tensor = convert_uint16_to_float(y_tensor) + y_out = convert_uint16_to_float(y_out) + atol = 1e-2 self.__assert_close( y_tensor, y_out, @@ -335,7 +356,7 @@ def check_with_place(self, place, data_layout, dtype, shape): + str(np.dtype(dtype)) + str(np.array(y_tensor)) + str(y_out), - atol=1e-3, + atol=atol, ) def test_check_output(self): @@ -376,6 +397,29 @@ def test_check_output(self): self.check_with_place(place, data_format, self.dtype, [2, 3]) +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA or not support the bfloat16", +) +class TestBF16BatchNormOpInference(TestBatchNormOpInference): + def setUp(self): + self.dtype = np.uint16 + self.use_mkldnn = False + self.fuse_with_relu = False + self.init_kernel_type() + + def test_check_output(self): + places = [core.CUDAPlace(0)] + for place in places: + # for data_format in ["NCHW", "NHWC"]: + for data_format in ["NCHW"]: + self.check_with_place( + place, data_format, self.dtype, [2, 3, 4, 5] + ) + self.check_with_place(place, data_format, self.dtype, [2, 3]) + + class TestBatchNormOpTraining(unittest.TestCase): def setUp(self): self.use_mkldnn = False From 9a7c83bdefb7bad3d476ea4eb524074c0a890229 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 12 Apr 2023 10:22:23 +0800 Subject: [PATCH 31/59] [Move Test] xpu (#52661) * move python/paddle/fluid/tests/unittests/xpu to test/xpu * update CMakeLists.txt * remove xpu in fluid/tests/unittests/ * add path to op_test_xpu * fix incorrect path * update test script * fix test_adadelta_op_xpu error --- paddle/scripts/paddle_build.sh | 2 +- .../fluid/tests/unittests/CMakeLists.txt | 4 -- .../fluid/tests/unittests/xpu/CMakeLists.txt | 36 ------------------ test/CMakeLists.txt | 4 +- test/xpu/CMakeLists.txt | 37 +++++++++++++++++++ .../xpu/collective_allgather_op_xpu.py | 0 .../xpu/collective_allreduce_op_xpu.py | 0 .../xpu/collective_broadcast_op_xpu.py | 0 .../xpu/collective_concat_op.py | 0 .../xpu/collective_identity_op_xpu.py | 0 ...ctive_softmax_with_cross_entropy_op_xpu.py | 0 .../xpu/collective_split_op.py | 0 .../xpu/get_test_cover_info.py | 0 .../unittests => test/xpu}/op_test_xpu.py | 12 ++++-- ...allel_dygraph_dataparallel_with_pylayer.py | 0 .../xpu/parallel_dygraph_gradient_check.py | 0 ...el_dygraph_gradient_check_in_eager_mode.py | 0 .../xpu/process_group_bkcl.py | 0 .../xpu/test_accuracy_op_xpu.py | 7 +--- .../xpu/test_activation_op_xpu.py | 9 ++--- .../xpu/test_adadelta_op_xpu.py | 12 +++--- .../xpu/test_adagrad_op_xpu.py | 14 +++---- .../xpu/test_adam_op_xpu.py | 7 +--- .../xpu/test_adamw_op_xpu.py | 8 +--- .../xpu/test_affine_channel_op_xpu.py | 4 -- .../test_amp_check_finite_and_scale_op_xpu.py | 7 +--- .../xpu/test_arg_max_op_xpu.py | 8 +--- .../xpu/test_argsort_op_xpu.py | 8 +--- .../xpu/test_assign_op_xpu.py | 8 +--- .../xpu/test_assign_value_op_xpu.py | 7 +--- .../xpu/test_atan_op_xpu.py | 6 +-- .../xpu/test_batch_norm_op_xpu.py | 5 +-- .../xpu/test_bce_loss_op_xpu.py | 7 +--- .../xpu/test_bilinear_interp_op_xpu.py | 3 -- .../xpu/test_bilinear_interp_v2_op_xpu.py | 8 +--- .../xpu/test_bitwise_op_xpu.py | 9 ++--- .../unittests => test}/xpu/test_bmm_op_xpu.py | 8 +--- .../unittests => test}/xpu/test_c_concat.py | 14 +++---- .../xpu/test_c_embedding_op_xpu.py | 2 - .../unittests => test}/xpu/test_c_split.py | 14 +++---- .../xpu/test_cast_op_xpu.py | 7 +--- .../xpu/test_clip_by_norm_op_xpu.py | 7 +--- .../xpu/test_clip_op_xpu.py | 7 +--- .../xpu/test_coalesce_tensor_op_xpu.py | 7 +--- .../xpu/test_collective_allgather_xpu.py | 14 +++---- .../xpu/test_collective_allreduce_xpu.py | 14 +++---- .../xpu/test_collective_base_xpu.py | 0 .../xpu/test_collective_broadcast_xpu.py | 6 +-- .../xpu/test_collective_identity_xpu.py | 14 +++---- .../xpu/test_collective_process_group.py | 0 ...llective_softmax_with_cross_entropy_xpu.py | 14 +++---- .../xpu/test_compare_op_xpu.py | 7 +--- .../xpu/test_concat_op_xpu.py | 8 ++-- .../xpu/test_conv2d_op_xpu.py | 7 +--- .../xpu/test_conv2d_transpose_op_xpu.py | 7 +--- .../xpu/test_conv3d_op_xpu.py | 5 +-- .../xpu/test_cumprod_op_xpu.py | 8 +--- .../xpu/test_cumsum_op_xpu.py | 8 +--- .../xpu/test_deformable_conv_op_xpu.py | 7 +--- .../xpu/test_depthwise_conv2d_op_xpu.py | 7 +--- .../xpu/test_device_guard_xpu.py | 4 -- .../xpu/test_diag_v2_op_xpu.py | 7 +--- .../xpu/test_diagonal_op_xpu.py | 12 +++--- .../test_distribute_fpn_proposals_op_xpu.py | 5 +-- .../xpu/test_dropout_op_xpu.py | 5 +-- .../xpu/test_einsum_op_xpu.py | 11 ++---- .../xpu/test_elementwise_add_op_xpu.py | 11 +++--- .../xpu/test_elementwise_add_op_xpu_kp.py | 7 ++-- .../xpu/test_elementwise_div_op_xpu.py | 8 ++-- .../xpu/test_elementwise_floordiv_op_xpu.py | 8 ++-- .../xpu/test_elementwise_max_op_xpu.py | 8 ++-- .../xpu/test_elementwise_min_op_xpu.py | 8 ++-- .../xpu/test_elementwise_mod_op_xpu.py | 8 ++-- .../xpu/test_elementwise_mul_op_xpu.py | 8 ++-- .../xpu/test_elementwise_pow_op_xpu.py | 8 ++-- .../xpu/test_elementwise_sub_op_xpu.py | 11 +++--- .../xpu/test_empty_op_xpu.py | 8 +--- .../xpu/test_expand_as_v2_op_xpu.py | 7 +--- .../xpu/test_expand_v2_op_xpu.py | 7 +--- .../xpu/test_fill_any_like_op_xpu.py | 8 +--- .../xpu/test_fill_any_op_xpu.py | 7 +--- .../xpu/test_fill_constant_op_xpu.py | 8 ++-- .../xpu/test_fill_diagonal_tensor_op_xpu.py | 12 +++--- .../xpu/test_fill_op_xpu.py | 7 +--- .../xpu/test_flatten2_op_xpu.py | 6 +-- .../test_flatten_contiguous_range_op_xpu.py | 10 +---- .../xpu/test_flatten_op_xpu.py | 6 +-- .../xpu/test_fleet_exe_dist_model_run_xpu.py | 0 .../xpu/test_fused_attention_op_xpu.py | 11 ++---- .../xpu/test_fused_feedforward_op_xpu.py | 9 +---- .../test_fused_gemm_epilogue_grad_op_xpu.py | 8 +--- .../xpu/test_fused_gemm_epilogue_op_xpu.py | 7 +--- .../test_fused_resnet_basic_block_op_xpu.py | 6 +-- .../xpu/test_gather_nd_op_xpu.py | 8 +--- .../xpu/test_gather_op_xpu.py | 7 +--- .../xpu/test_gaussian_random_op_xpu.py | 7 +--- .../xpu/test_gen_bkcl_id_op.py | 3 -- .../xpu/test_generate_proposals_v2_op_xpu.py | 13 ++----- .../xpu/test_grid_sampler_op_xpu.py | 8 +--- .../xpu/test_group_norm_op_xpu.py | 9 ++--- .../xpu/test_huber_loss_op_xpu.py | 9 ++--- .../xpu/test_increment_op_xpu.py | 8 +--- .../xpu/test_index_sample_op_xpu.py | 8 +--- .../xpu/test_index_select_op_xpu.py | 12 ++---- .../xpu/test_instance_norm_op_xpu.py | 15 +++----- .../xpu/test_iou_similarity_op_xpu.py | 9 ++--- .../xpu/test_isfinite_op_xpu.py | 7 +--- .../xpu/test_kldiv_loss_op_xpu.py | 7 +--- .../xpu/test_label_smooth_op_xpu.py | 11 ++---- .../xpu/test_lamb_op_xpu.py | 7 +--- .../xpu/test_layer_norm_op_xpu.py | 14 +++---- .../xpu/test_linspace_op_xpu.py | 8 +--- .../xpu/test_log_loss_op_xpu.py | 4 +- .../xpu/test_log_softmax_op_xpu.py | 8 +--- .../xpu/test_logical_op_xpu.py | 9 ++--- .../xpu/test_logsumexp_op_xpu.py | 6 +-- .../xpu/test_lookup_table_v2_op_xpu.py | 8 +--- .../xpu/test_masked_select_op_xpu.py | 8 +--- .../xpu/test_matmul_op_xpu.py | 7 +--- .../xpu/test_matmul_v2_op_xpu.py | 7 +--- .../xpu/test_mean_op_xpu.py | 8 +--- .../xpu/test_merged_momentum_op_xpu.py | 7 +--- .../xpu/test_merged_momentum_op_xpu_base.py | 0 .../xpu/test_meshgrid_op_xpu.py | 7 +--- .../xpu/test_momentum_op_xpu.py | 8 +--- .../unittests => test}/xpu/test_mul_op_xpu.py | 7 +--- .../xpu/test_nearest_interp_op_xpu.py | 3 -- .../xpu/test_nearest_interp_v2_op_xpu.py | 8 +--- .../xpu/test_one_hot_op_xpu.py | 7 +--- .../xpu/test_one_hot_v2_op_xpu.py | 15 +++----- .../xpu/test_p_norm_op_xpu.py | 11 ++---- .../xpu/test_pad3d_op_xpu.py | 8 +--- .../xpu/test_parallel_dygraph_dataparallel.py | 0 .../xpu/test_pixel_shuffle_op_xpu.py | 8 +--- .../xpu/test_pool2d_op_xpu.py | 9 ++--- .../xpu/test_pool3d_op_xpu.py | 11 ++---- .../xpu/test_pool_max_op_xpu.py | 7 +--- ...st_pow2_decay_with_linear_warmup_op_xpu.py | 8 ++-- .../xpu/test_prelu_op_xpu.py | 8 +--- .../xpu/test_prior_box_op_xpu.py | 8 +--- .../xpu/test_prod_op_xpu.py | 4 +- .../xpu/test_randint_op_xpu.py | 8 +--- .../xpu/test_randperm_op_xpu.py | 15 +++----- .../unittests => test}/xpu/test_range_xpu.py | 11 ++---- .../xpu/test_recompute_op_xpu.py | 0 .../xpu/test_reduce_all_op_xpu.py | 8 +--- .../xpu/test_reduce_amax_op_xpu.py | 8 +--- .../xpu/test_reduce_amin_op_xpu.py | 8 +--- .../xpu/test_reduce_any_op_xpu.py | 8 +--- .../xpu/test_reduce_max_op_xpu.py | 8 +--- .../xpu/test_reduce_mean_op_xpu.py | 7 +--- .../xpu/test_reduce_min_op_xpu.py | 8 +--- .../xpu/test_reduce_prod_op_xpu.py | 8 +--- .../xpu/test_reduce_sum_op_xpu.py | 8 +--- .../xpu/test_refactor_op_xpu.py | 9 ++--- .../xpu/test_reshape2_op_xpu.py | 8 +--- .../xpu/test_rmsprop_op_xpu.py | 8 +--- .../unittests => test}/xpu/test_rnn_op_xpu.py | 12 +++--- .../xpu/test_roi_align_op_xpu.py | 7 +--- .../xpu/test_roll_op_xpu.py | 11 ++---- .../xpu/test_scale_op_xpu.py | 8 +--- .../xpu/test_scatter_nd_add_op_xpu.py | 8 +--- .../xpu/test_scatter_op_xpu.py | 8 +--- .../xpu/test_sequence_conv_op_xpu.py | 4 +- .../xpu/test_sequence_unpad_op_xpu.py | 8 +--- .../xpu/test_set_value_op_xpu.py | 4 +- .../unittests => test}/xpu/test_sgd_op_xpu.py | 7 +--- .../xpu/test_shape_op_xpu.py | 7 +--- ...igmoid_cross_entropy_with_logits_op_xpu.py | 9 ++--- .../xpu/test_sign_op_xpu.py | 8 +--- .../xpu/test_slice_op_xpu.py | 11 ++---- .../xpu/test_softmax_op_xpu.py | 11 ++---- .../test_softmax_with_cross_entropy_op_xpu.py | 10 ++--- .../xpu/test_split_op_xpu.py | 7 +--- .../xpu/test_squeeze2_op_xpu.py | 7 +--- .../xpu/test_squeeze_op_xpu.py | 7 +--- .../xpu/test_stack_op_xpu.py | 8 ++-- .../xpu/test_strided_slice_op_xpu.py | 11 ++---- .../unittests => test}/xpu/test_sum_op_xpu.py | 7 +--- .../xpu/test_temporal_shift_op_xpu.py | 8 +--- .../xpu/test_tile_op_xpu.py | 7 +--- .../xpu/test_top_k_op_xpu.py | 7 +--- .../xpu/test_top_k_v2_op_xpu.py | 7 +--- .../xpu/test_transpose_op_xpu.py | 7 +--- .../xpu/test_tril_triu_op_xpu.py | 8 +--- .../test_truncated_gaussian_random_op_xpu.py | 7 +--- .../xpu/test_unbind_op_xpu.py | 6 +-- .../xpu/test_unfold_op_xpu.py | 13 +++---- .../xpu/test_uniform_random_op_xpu.py | 4 +- .../xpu/test_unsqueeze2_op_xpu.py | 7 +--- .../xpu/test_unsqueeze_op_xpu.py | 7 +--- .../xpu/test_unstack_op_xpu.py | 7 +--- .../xpu/test_update_loss_scaling_op_xpu.py | 6 +-- .../xpu/test_warpctc_op_xpu.py | 8 ++-- .../xpu/test_where_index_xpu.py | 8 +--- .../xpu/test_where_op_xpu.py | 8 +--- .../xpu/test_while_op_xpu.py | 0 .../unittests => test}/xpu/test_xpu_place.py | 0 .../xpu/test_xpu_stream_event.py | 0 .../xpu/test_zero_dim_tensor_xpu.py | 0 tools/get_pr_ut.py | 2 +- 201 files changed, 503 insertions(+), 993 deletions(-) delete mode 100644 python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt rename {python/paddle/fluid/tests/unittests => test}/xpu/collective_allgather_op_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/collective_allreduce_op_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/collective_broadcast_op_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/collective_concat_op.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/collective_identity_op_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/collective_softmax_with_cross_entropy_op_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/collective_split_op.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/get_test_cover_info.py (100%) rename {python/paddle/fluid/tests/unittests => test/xpu}/op_test_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/parallel_dygraph_dataparallel_with_pylayer.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/parallel_dygraph_gradient_check.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/parallel_dygraph_gradient_check_in_eager_mode.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/process_group_bkcl.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_accuracy_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_activation_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_adadelta_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_adagrad_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_adam_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_adamw_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_affine_channel_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_amp_check_finite_and_scale_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_arg_max_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_argsort_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_assign_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_assign_value_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_atan_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_batch_norm_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_bce_loss_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_bilinear_interp_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_bilinear_interp_v2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_bitwise_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_bmm_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_c_concat.py (95%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_c_embedding_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_c_split.py (95%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_cast_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_clip_by_norm_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_clip_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_coalesce_tensor_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_collective_allgather_xpu.py (95%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_collective_allreduce_xpu.py (95%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_collective_base_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_collective_broadcast_xpu.py (92%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_collective_identity_xpu.py (95%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_collective_process_group.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_collective_softmax_with_cross_entropy_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_compare_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_concat_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_conv2d_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_conv2d_transpose_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_conv3d_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_cumprod_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_cumsum_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_deformable_conv_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_depthwise_conv2d_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_device_guard_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_diag_v2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_diagonal_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_distribute_fpn_proposals_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_dropout_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_einsum_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_add_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_add_op_xpu_kp.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_div_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_floordiv_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_max_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_min_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_mod_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_mul_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_pow_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_elementwise_sub_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_empty_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_expand_as_v2_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_expand_v2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fill_any_like_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fill_any_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fill_constant_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fill_diagonal_tensor_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fill_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_flatten2_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_flatten_contiguous_range_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_flatten_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fleet_exe_dist_model_run_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fused_attention_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fused_feedforward_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fused_gemm_epilogue_grad_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fused_gemm_epilogue_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_fused_resnet_basic_block_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_gather_nd_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_gather_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_gaussian_random_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_gen_bkcl_id_op.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_generate_proposals_v2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_grid_sampler_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_group_norm_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_huber_loss_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_increment_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_index_sample_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_index_select_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_instance_norm_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_iou_similarity_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_isfinite_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_kldiv_loss_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_label_smooth_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_lamb_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_layer_norm_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_linspace_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_log_loss_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_log_softmax_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_logical_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_logsumexp_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_lookup_table_v2_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_masked_select_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_matmul_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_matmul_v2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_mean_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_merged_momentum_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_merged_momentum_op_xpu_base.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_meshgrid_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_momentum_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_mul_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_nearest_interp_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_nearest_interp_v2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_one_hot_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_one_hot_v2_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_p_norm_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_pad3d_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_parallel_dygraph_dataparallel.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_pixel_shuffle_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_pool2d_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_pool3d_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_pool_max_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_prelu_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_prior_box_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_prod_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_randint_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_randperm_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_range_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_recompute_op_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_all_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_amax_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_amin_op_xpu.py (96%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_any_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_max_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_mean_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_min_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_prod_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reduce_sum_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_refactor_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_reshape2_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_rmsprop_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_rnn_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_roi_align_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_roll_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_scale_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_scatter_nd_add_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_scatter_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_sequence_conv_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_sequence_unpad_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_set_value_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_sgd_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_shape_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_sign_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_slice_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_softmax_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_softmax_with_cross_entropy_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_split_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_squeeze2_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_squeeze_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_stack_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_strided_slice_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_sum_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_temporal_shift_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_tile_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_top_k_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_top_k_v2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_transpose_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_tril_triu_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_truncated_gaussian_random_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_unbind_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_unfold_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_uniform_random_op_xpu.py (95%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_unsqueeze2_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_unsqueeze_op_xpu.py (97%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_unstack_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_update_loss_scaling_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_warpctc_op_xpu.py (99%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_where_index_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_where_op_xpu.py (98%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_while_op_xpu.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_xpu_place.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_xpu_stream_event.py (100%) rename {python/paddle/fluid/tests/unittests => test}/xpu/test_zero_dim_tensor_xpu.py (100%) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 2db59b7b61ce7..4693d78e2dc32 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -2237,7 +2237,7 @@ set +x set -x ut_endTime_s=`date +%s` echo "XPU testCase Time: $[ $ut_endTime_s - $ut_startTime_s ]s" - python ${PADDLE_ROOT}/build/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py + python ${PADDLE_ROOT}/build/test/xpu/get_test_cover_info.py unset XPU_OP_LIST_DIR if [[ "$EXIT_CODE" != "0" ]]; then exit 8; diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 63279cffc3e51..909b658c0983c 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -772,10 +772,6 @@ add_subdirectory(sequence) add_subdirectory(rnn) add_subdirectory(distribution) -if(WITH_XPU) - add_subdirectory(xpu) -endif() - # dist xpu tests: if(WITH_XPU_BKCL) py_test(test_collective_allreduce_api_xpu diff --git a/python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt b/python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt deleted file mode 100644 index cc46e42f8ca64..0000000000000 --- a/python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -file( - GLOB TEST_OPS - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -if(WITH_XPU_BKCL) - list(REMOVE_ITEM TEST_OPS "test_gen_bkcl_id_op") -endif() - -file( - GLOB DIST_TEST_OPS - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "test_dist_*.py") -if(WITH_XPU_BKCL) - list(APPEND DIST_TEST_OPS test_gen_bkcl_id_op) -endif() - -foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) -endforeach() - -foreach(TEST_OP ${DIST_TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) -endforeach() - -set_tests_properties(test_conv2d_op_xpu PROPERTIES TIMEOUT 120) -set_tests_properties(test_mul_op_xpu PROPERTIES TIMEOUT 120) -set_tests_properties(test_matmul_v2_op_xpu PROPERTIES TIMEOUT 900) -set_tests_properties(test_matmul_op_xpu PROPERTIES TIMEOUT 300) -set_tests_properties(test_collective_identity_xpu - PROPERTIES LABELS "RUN_TYPE=DIST_KUNLUN") -set_tests_properties(test_collective_allgather_xpu - PROPERTIES LABELS "RUN_TYPE=DIST_KUNLUN") -set_tests_properties(test_collective_allreduce_xpu - PROPERTIES LABELS "RUN_TYPE=DIST_KUNLUN") diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4d50fe16b9b05..8bbd59a7176ff 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -128,7 +128,9 @@ if(WITH_TESTING) add_subdirectory(standalone_executor) add_subdirectory(tokenizer) # add_subdirectory(white_list) - add_subdirectory(xpu) + if(WITH_XPU) + add_subdirectory(xpu) + endif() endif() get_property(test_srcs GLOBAL PROPERTY TEST_SRCS) diff --git a/test/xpu/CMakeLists.txt b/test/xpu/CMakeLists.txt index e0543ef9e50f5..4ecde12f008af 100644 --- a/test/xpu/CMakeLists.txt +++ b/test/xpu/CMakeLists.txt @@ -1,3 +1,40 @@ if(WITH_XPU) add_subdirectory(cpp) endif() + +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +if(WITH_XPU_BKCL) + list(REMOVE_ITEM TEST_OPS "test_gen_bkcl_id_op") +endif() + +file( + GLOB DIST_TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_dist_*.py") +if(WITH_XPU_BKCL) + list(APPEND DIST_TEST_OPS test_gen_bkcl_id_op) +endif() + +foreach(TEST_OP ${TEST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) +endforeach() + +foreach(TEST_OP ${DIST_TEST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) +endforeach() + +set_tests_properties(test_conv2d_op_xpu PROPERTIES TIMEOUT 120) +set_tests_properties(test_mul_op_xpu PROPERTIES TIMEOUT 120) +set_tests_properties(test_matmul_v2_op_xpu PROPERTIES TIMEOUT 900) +set_tests_properties(test_matmul_op_xpu PROPERTIES TIMEOUT 300) +set_tests_properties(test_collective_identity_xpu + PROPERTIES LABELS "RUN_TYPE=DIST_KUNLUN") +set_tests_properties(test_collective_allgather_xpu + PROPERTIES LABELS "RUN_TYPE=DIST_KUNLUN") +set_tests_properties(test_collective_allreduce_xpu + PROPERTIES LABELS "RUN_TYPE=DIST_KUNLUN") diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py b/test/xpu/collective_allgather_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py rename to test/xpu/collective_allgather_op_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py b/test/xpu/collective_allreduce_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py rename to test/xpu/collective_allreduce_op_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_broadcast_op_xpu.py b/test/xpu/collective_broadcast_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/collective_broadcast_op_xpu.py rename to test/xpu/collective_broadcast_op_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_concat_op.py b/test/xpu/collective_concat_op.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/collective_concat_op.py rename to test/xpu/collective_concat_op.py diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py b/test/xpu/collective_identity_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py rename to test/xpu/collective_identity_op_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_softmax_with_cross_entropy_op_xpu.py b/test/xpu/collective_softmax_with_cross_entropy_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/collective_softmax_with_cross_entropy_op_xpu.py rename to test/xpu/collective_softmax_with_cross_entropy_op_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_split_op.py b/test/xpu/collective_split_op.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/collective_split_op.py rename to test/xpu/collective_split_op.py diff --git a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py b/test/xpu/get_test_cover_info.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py rename to test/xpu/get_test_cover_info.py diff --git a/python/paddle/fluid/tests/unittests/op_test_xpu.py b/test/xpu/op_test_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/op_test_xpu.py rename to test/xpu/op_test_xpu.py index af92704a57216..02e68b3c3ed93 100644 --- a/python/paddle/fluid/tests/unittests/op_test_xpu.py +++ b/test/xpu/op_test_xpu.py @@ -12,15 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys + import numpy as np + +sys.path.append('..') +sys.path.append('../../python/paddle/fluid/tests/unittests') + from eager_op_test import OpTest -from testsuite import append_loss_ops, create_op, set_input -from white_list import no_grad_set_white_list, op_threshold_white_list -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( get_xpu_op_support_types, is_empty_grad_op_type, type_dict_str_to_numpy, ) +from testsuite import append_loss_ops, create_op, set_input +from white_list import no_grad_set_white_list, op_threshold_white_list import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py b/test/xpu/parallel_dygraph_dataparallel_with_pylayer.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py rename to test/xpu/parallel_dygraph_dataparallel_with_pylayer.py diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py b/test/xpu/parallel_dygraph_gradient_check.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py rename to test/xpu/parallel_dygraph_gradient_check.py diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py b/test/xpu/parallel_dygraph_gradient_check_in_eager_mode.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py rename to test/xpu/parallel_dygraph_gradient_check_in_eager_mode.py diff --git a/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py b/test/xpu/process_group_bkcl.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py rename to test/xpu/process_group_bkcl.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py b/test/xpu/test_accuracy_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py rename to test/xpu/test_accuracy_op_xpu.py index 082e883ded741..a87f6c084351c 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py +++ b/test/xpu/test_accuracy_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/test/xpu/test_activation_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py rename to test/xpu/test_activation_op_xpu.py index b071db95b40bc..a877b09bbc957 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py +++ b/test/xpu/test_activation_op_xpu.py @@ -15,17 +15,16 @@ import sys import unittest -import numpy as np - -sys.path.append("..") +sys.path.append('../../python/paddle/fluid/tests/unittests') +import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle import paddle.nn.functional as F diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/test/xpu/test_adadelta_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py rename to test/xpu/test_adadelta_op_xpu.py index 71b691a6f2743..b6ef0fbdf8ec8 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py +++ b/test/xpu/test_adadelta_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid @@ -52,11 +48,13 @@ def setUp(self): rho = 0.95 epsilon = 1e-6 + learning_rate = 1.0 self.inputs = { 'Param': param, 'Grad': grad, 'AvgSquaredGrad': avg_squared_grad, 'AvgSquaredUpdate': avg_squared_update, + 'LearningRate': np.array([learning_rate]).astype("float32"), } self.attrs = {'rho': rho, 'epsilon': epsilon} @@ -107,11 +105,13 @@ def setUp(self): rho = 0.95 epsilon = 1e-6 + learning_rate = 1.0 self.inputs = { 'Param': param, 'Grad': grad, 'AvgSquaredGrad': avg_squared_grad, 'AvgSquaredUpdate': avg_squared_update, + 'LearningRate': np.array([learning_rate]).astype("float32"), } avg_squared_grad_out = rho * avg_squared_grad + ( diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adagrad_op_xpu.py b/test/xpu/test_adagrad_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_adagrad_op_xpu.py rename to test/xpu/test_adagrad_op_xpu.py index 942ffd26a4c09..34040ebd3f77b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adagrad_op_xpu.py +++ b/test/xpu/test_adagrad_op_xpu.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -import numpy as np - -import paddle - -sys.path.append("..") import unittest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +import numpy as np +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py b/test/xpu/test_adam_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py rename to test/xpu/test_adam_op_xpu.py index 6d44d355e4cdc..990136c57170e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py +++ b/test/xpu/test_adam_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py b/test/xpu/test_adamw_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py rename to test/xpu/test_adamw_op_xpu.py index d30fdbed09db8..768cbe8151da3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py +++ b/test/xpu/test_adamw_op_xpu.py @@ -12,20 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") - import unittest from functools import partial import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py b/test/xpu/test_affine_channel_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py rename to test/xpu/test_affine_channel_op_xpu.py index 6f85dc47488ab..c200235ff879c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py +++ b/test/xpu/test_affine_channel_op_xpu.py @@ -15,10 +15,6 @@ Unit testing for affine_channel_op """ -import sys - -sys.path.append("..") - import unittest import numpy as np diff --git a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py b/test/xpu/test_amp_check_finite_and_scale_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py rename to test/xpu/test_amp_check_finite_and_scale_op_xpu.py index e171625dd4367..6abcf53707a33 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py +++ b/test/xpu/test_amp_check_finite_and_scale_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py b/test/xpu/test_arg_max_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py rename to test/xpu/test_arg_max_op_xpu.py index d9a69216351a4..4a8e0dc28fad1 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py +++ b/test/xpu/test_arg_max_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py b/test/xpu/test_argsort_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py rename to test/xpu/test_argsort_op_xpu.py index 39f554f9ac176..f3a8a69ee5ded 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py +++ b/test/xpu/test_argsort_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_assign_op_xpu.py b/test/xpu/test_assign_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_assign_op_xpu.py rename to test/xpu/test_assign_op_xpu.py index 97460b54aa310..d3102dd448a49 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_assign_op_xpu.py +++ b/test/xpu/test_assign_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py b/test/xpu/test_assign_value_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py rename to test/xpu/test_assign_value_op_xpu.py index d98e6375da52d..a0e3a57dc8ac5 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py +++ b/test/xpu/test_assign_value_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_atan_op_xpu.py b/test/xpu/test_atan_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_atan_op_xpu.py rename to test/xpu/test_atan_op_xpu.py index bb02e1320da15..4ab5b14e9b44e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_atan_op_xpu.py +++ b/test/xpu/test_atan_op_xpu.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np @@ -21,14 +20,13 @@ paddle.enable_static() -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest class XPUTestAtanOp(XPUOpTestWrapper): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/test/xpu/test_batch_norm_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py rename to test/xpu/test_batch_norm_op_xpu.py index 446d49717af81..6cf666c8094c9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py +++ b/test/xpu/test_batch_norm_op_xpu.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py b/test/xpu/test_bce_loss_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py rename to test/xpu/test_bce_loss_op_xpu.py index 883063969ff6a..acc3bd06e6103 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py +++ b/test/xpu/test_bce_loss_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py b/test/xpu/test_bilinear_interp_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py rename to test/xpu/test_bilinear_interp_op_xpu.py index dc8e996e09382..a5a849f080e6a 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py +++ b/test/xpu/test_bilinear_interp_op_xpu.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import paddle -sys.path.append("..") - paddle.enable_static() ''' def bilinear_interp_np(input, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py b/test/xpu/test_bilinear_interp_v2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py rename to test/xpu/test_bilinear_interp_v2_op_xpu.py index ebd48f55d57f1..dd0a6049221fd 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py +++ b/test/xpu/test_bilinear_interp_v2_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py b/test/xpu/test_bitwise_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py rename to test/xpu/test_bitwise_op_xpu.py index 8fcf5a7af7811..1d21108bf8cd5 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py +++ b/test/xpu/test_bitwise_op_xpu.py @@ -15,17 +15,16 @@ import sys import unittest -import numpy as np - -sys.path.append("..") +sys.path.append('../../python/paddle/fluid/tests/unittests') +import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py b/test/xpu/test_bmm_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py rename to test/xpu/test_bmm_op_xpu.py index d0d43dd94b0aa..48bd4ea692cf8 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py +++ b/test/xpu/test_bmm_op_xpu.py @@ -10,19 +10,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") - import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_c_concat.py b/test/xpu/test_c_concat.py similarity index 95% rename from python/paddle/fluid/tests/unittests/xpu/test_c_concat.py rename to test/xpu/test_c_concat.py index 313ae27a5b617..d2490aa3772dc 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_c_concat.py +++ b/test/xpu/test_c_concat.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -from test_collective_base_xpu import TestDistBase - -import paddle -from paddle.fluid import core - -sys.path.append("..") - -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_collective_base_xpu import TestDistBase + +import paddle +from paddle.fluid import core paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py b/test/xpu/test_c_embedding_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py rename to test/xpu/test_c_embedding_op_xpu.py index b685458a3eed6..4d0989c322e54 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py +++ b/test/xpu/test_c_embedding_op_xpu.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") import paddle from paddle.fluid.tests.unittests.c_embedding_op_base import ( TestCEmbeddingCPU, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_c_split.py b/test/xpu/test_c_split.py similarity index 95% rename from python/paddle/fluid/tests/unittests/xpu/test_c_split.py rename to test/xpu/test_c_split.py index c5b0f236935af..67e2f1a6cc5f6 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_c_split.py +++ b/test/xpu/test_c_split.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -from test_collective_base_xpu import TestDistBase - -import paddle -from paddle.fluid import core - -sys.path.append("..") - -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_collective_base_xpu import TestDistBase + +import paddle +from paddle.fluid import core paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py b/test/xpu/test_cast_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py rename to test/xpu/test_cast_op_xpu.py index e013432d13b97..baf814e08de8a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py +++ b/test/xpu/test_cast_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py b/test/xpu/test_clip_by_norm_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py rename to test/xpu/test_clip_by_norm_op_xpu.py index 206f65c10afcd..4bec31b80d85c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py +++ b/test/xpu/test_clip_by_norm_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py b/test/xpu/test_clip_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py rename to test/xpu/test_clip_op_xpu.py index 994153a8dd725..79d4e3e779869 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py +++ b/test/xpu/test_clip_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_coalesce_tensor_op_xpu.py b/test/xpu/test_coalesce_tensor_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_coalesce_tensor_op_xpu.py rename to test/xpu/test_coalesce_tensor_op_xpu.py index 2324d09857dcf..f0f053137949f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_coalesce_tensor_op_xpu.py +++ b/test/xpu/test_coalesce_tensor_op_xpu.py @@ -12,22 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np from paddle.fluid import core -sys.path.append("..") - alignment = 256 -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_allgather_xpu.py b/test/xpu/test_collective_allgather_xpu.py similarity index 95% rename from python/paddle/fluid/tests/unittests/xpu/test_collective_allgather_xpu.py rename to test/xpu/test_collective_allgather_xpu.py index be1326d176456..3651ed2062957 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_collective_allgather_xpu.py +++ b/test/xpu/test_collective_allgather_xpu.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -from test_collective_base_xpu import TestDistBase - -import paddle -from paddle.fluid import core - -sys.path.append("..") - -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_collective_base_xpu import TestDistBase + +import paddle +from paddle.fluid import core paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_allreduce_xpu.py b/test/xpu/test_collective_allreduce_xpu.py similarity index 95% rename from python/paddle/fluid/tests/unittests/xpu/test_collective_allreduce_xpu.py rename to test/xpu/test_collective_allreduce_xpu.py index 187494f50154e..05539aeaae432 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_collective_allreduce_xpu.py +++ b/test/xpu/test_collective_allreduce_xpu.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -from test_collective_base_xpu import TestDistBase - -import paddle -from paddle.fluid import core - -sys.path.append("..") - -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_collective_base_xpu import TestDistBase + +import paddle +from paddle.fluid import core paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py b/test/xpu/test_collective_base_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py rename to test/xpu/test_collective_base_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_broadcast_xpu.py b/test/xpu/test_collective_broadcast_xpu.py similarity index 92% rename from python/paddle/fluid/tests/unittests/xpu/test_collective_broadcast_xpu.py rename to test/xpu/test_collective_broadcast_xpu.py index e015d0f92b114..5ddb451e7e4fa 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_collective_broadcast_xpu.py +++ b/test/xpu/test_collective_broadcast_xpu.py @@ -12,18 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest +from get_test_cover_info import XPUOpTestWrapper, create_test_class from test_collective_base_xpu import TestDistBase import paddle from paddle.fluid import core -sys.path.append("..") - -from xpu.get_test_cover_info import XPUOpTestWrapper, create_test_class - paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_identity_xpu.py b/test/xpu/test_collective_identity_xpu.py similarity index 95% rename from python/paddle/fluid/tests/unittests/xpu/test_collective_identity_xpu.py rename to test/xpu/test_collective_identity_xpu.py index 3b5a2fa767a97..421f9168a28d3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_collective_identity_xpu.py +++ b/test/xpu/test_collective_identity_xpu.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -from test_collective_base_xpu import TestDistBase - -import paddle -from paddle.fluid import core - -sys.path.append("..") - -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_collective_base_xpu import TestDistBase + +import paddle +from paddle.fluid import core paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_process_group.py b/test/xpu/test_collective_process_group.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_collective_process_group.py rename to test/xpu/test_collective_process_group.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_softmax_with_cross_entropy_xpu.py b/test/xpu/test_collective_softmax_with_cross_entropy_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_collective_softmax_with_cross_entropy_xpu.py rename to test/xpu/test_collective_softmax_with_cross_entropy_xpu.py index 703194eb58d5e..0bc75c7a4930b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_collective_softmax_with_cross_entropy_xpu.py +++ b/test/xpu/test_collective_softmax_with_cross_entropy_xpu.py @@ -13,22 +13,18 @@ # limitations under the License. import os -import sys import unittest import numpy as np -from test_collective_base_xpu import DataTypeCast, TestDistBase - -import paddle -from paddle.framework import core - -sys.path.append("..") - -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_collective_base_xpu import DataTypeCast, TestDistBase + +import paddle +from paddle.framework import core paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py b/test/xpu/test_compare_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py rename to test/xpu/test_compare_op_xpu.py index e16b9032f2ea4..4793122a81753 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py +++ b/test/xpu/test_compare_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py b/test/xpu/test_concat_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py rename to test/xpu/test_concat_op_xpu.py index 5867858a97b4d..4f722ef6d9853 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py +++ b/test/xpu/test_concat_op_xpu.py @@ -13,18 +13,18 @@ # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py b/test/xpu/test_conv2d_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py rename to test/xpu/test_conv2d_op_xpu.py index a3eb2a1f3a77e..d09402f934c69 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py +++ b/test/xpu/test_conv2d_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py b/test/xpu/test_conv2d_transpose_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py rename to test/xpu/test_conv2d_transpose_op_xpu.py index a5be198089e86..7bf01d23fb56f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py +++ b/test/xpu/test_conv2d_transpose_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py b/test/xpu/test_conv3d_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py rename to test/xpu/test_conv3d_op_xpu.py index f6578371b97ad..f9904148f9b38 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py +++ b/test/xpu/test_conv3d_op_xpu.py @@ -12,14 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np +from get_test_cover_info import XPUOpTestWrapper, create_test_class from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import XPUOpTestWrapper, create_test_class import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cumprod_op_xpu.py b/test/xpu/test_cumprod_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_cumprod_op_xpu.py rename to test/xpu/test_cumprod_op_xpu.py index 3ea12d2bf9f41..fb3763ac5e8f7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_cumprod_op_xpu.py +++ b/test/xpu/test_cumprod_op_xpu.py @@ -13,19 +13,15 @@ # limitations under the License. import random -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py b/test/xpu/test_cumsum_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py rename to test/xpu/test_cumsum_op_xpu.py index 8ba052171fc2a..2e3555b702576 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py +++ b/test/xpu/test_cumsum_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py b/test/xpu/test_deformable_conv_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py rename to test/xpu/test_deformable_conv_op_xpu.py index 84afb9cbd03ea..8577cb2497704 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py +++ b/test/xpu/test_deformable_conv_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import OpTest, XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import OpTest, XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py b/test/xpu/test_depthwise_conv2d_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py rename to test/xpu/test_depthwise_conv2d_op_xpu.py index 7ccf79170ddf2..a0b01c921280f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py +++ b/test/xpu/test_depthwise_conv2d_op_xpu.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np @@ -22,12 +19,12 @@ import paddle paddle.enable_static() -from test_conv2d_op_xpu import XPUTestConv2DOp, XPUTestConv2DOp_v2 -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_conv2d_op_xpu import XPUTestConv2DOp, XPUTestConv2DOp_v2 class XPUTestDepthwiseConv2DOp(XPUOpTestWrapper): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py b/test/xpu/test_device_guard_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py rename to test/xpu/test_device_guard_xpu.py index 01581c9ac61cc..cc9fb142279ac 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py +++ b/test/xpu/test_device_guard_xpu.py @@ -12,11 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest - -sys.path.append("..") - import warnings import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_diag_v2_op_xpu.py b/test/xpu/test_diag_v2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_diag_v2_op_xpu.py rename to test/xpu/test_diag_v2_op_xpu.py index 0a2eac8720ef1..51f42d00507fe 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_diag_v2_op_xpu.py +++ b/test/xpu/test_diag_v2_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") - import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_diagonal_op_xpu.py b/test/xpu/test_diagonal_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_diagonal_op_xpu.py rename to test/xpu/test_diagonal_op_xpu.py index 001cd727a081c..bbf289ce4c9fd 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_diagonal_op_xpu.py +++ b/test/xpu/test_diagonal_op_xpu.py @@ -15,18 +15,18 @@ import sys import unittest -import numpy as np - -import paddle +sys.path.append('../../python/paddle/fluid/tests/unittests') -sys.path.append("..") +import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_distribute_fpn_proposals_op_xpu.py b/test/xpu/test_distribute_fpn_proposals_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_distribute_fpn_proposals_op_xpu.py rename to test/xpu/test_distribute_fpn_proposals_op_xpu.py index c11899d4c7ed7..230b9647f6ef1 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_distribute_fpn_proposals_op_xpu.py +++ b/test/xpu/test_distribute_fpn_proposals_op_xpu.py @@ -11,9 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np @@ -23,7 +20,7 @@ paddle.enable_static() -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py b/test/xpu/test_dropout_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py rename to test/xpu/test_dropout_op_xpu.py index 562b968b5f698..1a3459736c268 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py +++ b/test/xpu/test_dropout_op_xpu.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np @@ -26,7 +23,7 @@ paddle.enable_static() -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_einsum_op_xpu.py b/test/xpu/test_einsum_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_einsum_op_xpu.py rename to test/xpu/test_einsum_op_xpu.py index cb73f85671a83..57a82009834fa 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_einsum_op_xpu.py +++ b/test/xpu/test_einsum_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py b/test/xpu/test_elementwise_add_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py rename to test/xpu/test_elementwise_add_op_xpu.py index 06db6b54a6740..8d894a7b8828c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py +++ b/test/xpu/test_elementwise_add_op_xpu.py @@ -13,19 +13,18 @@ # limitations under the License. import sys - -import numpy as np - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + +import numpy as np from eager_op_test import OpTest, skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py b/test/xpu/test_elementwise_add_op_xpu_kp.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py rename to test/xpu/test_elementwise_add_op_xpu_kp.py index d9ef90fb2363f..267ba2ec2b959 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py +++ b/test/xpu/test_elementwise_add_op_xpu_kp.py @@ -13,12 +13,11 @@ # limitations under the License. import sys - -import numpy as np - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + +import numpy as np from eager_op_test import OpTest, skip_check_grad_ci from op_test_xpu import XPUOpTest diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py b/test/xpu/test_elementwise_div_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py rename to test/xpu/test_elementwise_div_op_xpu.py index fc1bf1d834aeb..ca7693d0ab8e7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py +++ b/test/xpu/test_elementwise_div_op_xpu.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py b/test/xpu/test_elementwise_floordiv_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py rename to test/xpu/test_elementwise_floordiv_op_xpu.py index 83c476a213ac0..3aa7a7f2c138a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py +++ b/test/xpu/test_elementwise_floordiv_op_xpu.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py b/test/xpu/test_elementwise_max_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py rename to test/xpu/test_elementwise_max_op_xpu.py index 66982e9a2c5e5..d9e96ec1fcb2c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py +++ b/test/xpu/test_elementwise_max_op_xpu.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py b/test/xpu/test_elementwise_min_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py rename to test/xpu/test_elementwise_min_op_xpu.py index c79cc9b8e130c..34223b52780f7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py +++ b/test/xpu/test_elementwise_min_op_xpu.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py b/test/xpu/test_elementwise_mod_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py rename to test/xpu/test_elementwise_mod_op_xpu.py index c00ea8db5c859..f909a12cc5e21 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py +++ b/test/xpu/test_elementwise_mod_op_xpu.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py b/test/xpu/test_elementwise_mul_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py rename to test/xpu/test_elementwise_mul_op_xpu.py index 6dea1d6b99199..a6c1319b5f19d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py +++ b/test/xpu/test_elementwise_mul_op_xpu.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import OpTest, skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py b/test/xpu/test_elementwise_pow_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py rename to test/xpu/test_elementwise_pow_op_xpu.py index 431ca838c1ab7..5864bfa00c793 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py +++ b/test/xpu/test_elementwise_pow_op_xpu.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import OpTest, skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py b/test/xpu/test_elementwise_sub_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py rename to test/xpu/test_elementwise_sub_op_xpu.py index 5b731ef32bb0d..aeddf4641d726 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py +++ b/test/xpu/test_elementwise_sub_op_xpu.py @@ -13,19 +13,18 @@ # limitations under the License. import sys - -import numpy as np - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + +import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_empty_op_xpu.py b/test/xpu/test_empty_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_empty_op_xpu.py rename to test/xpu/test_empty_op_xpu.py index 8724188127522..71c25f335b1ba 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_empty_op_xpu.py +++ b/test/xpu/test_empty_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") - import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid.framework import convert_np_dtype_to_dtype_ diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py b/test/xpu/test_expand_as_v2_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py rename to test/xpu/test_expand_as_v2_op_xpu.py index ac5e06c2682c8..586761c9aeac4 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py +++ b/test/xpu/test_expand_as_v2_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py b/test/xpu/test_expand_v2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py rename to test/xpu/test_expand_v2_op_xpu.py index f7098282a62a3..9d869d14b32e2 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py +++ b/test/xpu/test_expand_v2_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py b/test/xpu/test_fill_any_like_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py rename to test/xpu/test_fill_any_like_op_xpu.py index af8f9518b5483..079a86b07c44a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py +++ b/test/xpu/test_fill_any_like_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") - import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_op_xpu.py b/test/xpu/test_fill_any_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_fill_any_op_xpu.py rename to test/xpu/test_fill_any_op_xpu.py index 95d514d94cecd..e351d9dacd1a3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_op_xpu.py +++ b/test/xpu/test_fill_any_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py b/test/xpu/test_fill_constant_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py rename to test/xpu/test_fill_constant_op_xpu.py index 4bd9abae9a5b9..d2a01a1e6377b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py +++ b/test/xpu/test_fill_constant_op_xpu.py @@ -13,18 +13,18 @@ # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import convert_float_to_uint16 -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_diagonal_tensor_op_xpu.py b/test/xpu/test_fill_diagonal_tensor_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_fill_diagonal_tensor_op_xpu.py rename to test/xpu/test_fill_diagonal_tensor_op_xpu.py index 3fbdf7abe6d14..de5025e8c4c05 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fill_diagonal_tensor_op_xpu.py +++ b/test/xpu/test_fill_diagonal_tensor_op_xpu.py @@ -15,18 +15,18 @@ import sys import unittest -import numpy as np - -import paddle +sys.path.append('../../python/paddle/fluid/tests/unittests') -sys.path.append("..") +import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_op_xpu.py b/test/xpu/test_fill_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_fill_op_xpu.py rename to test/xpu/test_fill_op_xpu.py index 4cb43a2ba430c..99ca677ce4200 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fill_op_xpu.py +++ b/test/xpu/test_fill_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py b/test/xpu/test_flatten2_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py rename to test/xpu/test_flatten2_op_xpu.py index 380da7b62d0b6..9595b9877bc5a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py +++ b/test/xpu/test_flatten2_op_xpu.py @@ -12,17 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py b/test/xpu/test_flatten_contiguous_range_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py rename to test/xpu/test_flatten_contiguous_range_op_xpu.py index af6f2095fc97d..05ad91958374b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py +++ b/test/xpu/test_flatten_contiguous_range_op_xpu.py @@ -12,21 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py b/test/xpu/test_flatten_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py rename to test/xpu/test_flatten_op_xpu.py index 9876b6c381540..7673ec9ba3d6d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py +++ b/test/xpu/test_flatten_op_xpu.py @@ -12,17 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py b/test/xpu/test_fleet_exe_dist_model_run_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py rename to test/xpu/test_fleet_exe_dist_model_run_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_attention_op_xpu.py b/test/xpu/test_fused_attention_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_fused_attention_op_xpu.py rename to test/xpu/test_fused_attention_op_xpu.py index 3cdb5094f21d4..9db584f278e7f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_attention_op_xpu.py +++ b/test/xpu/test_fused_attention_op_xpu.py @@ -12,20 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -import numpy as np - -sys.path.append("..") - import unittest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +import numpy as np +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle import paddle.incubate.nn.functional as incubate_f diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_feedforward_op_xpu.py b/test/xpu/test_fused_feedforward_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_fused_feedforward_op_xpu.py rename to test/xpu/test_fused_feedforward_op_xpu.py index feb7549a33e50..11f7148e188d0 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_feedforward_op_xpu.py +++ b/test/xpu/test_fused_feedforward_op_xpu.py @@ -11,16 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sys - -import numpy as np - -sys.path.append("..") - import unittest +import numpy as np +from get_test_cover_info import XPUOpTestWrapper, create_test_class from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import XPUOpTestWrapper, create_test_class import paddle import paddle.incubate.nn.functional as incubate_f diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_grad_op_xpu.py b/test/xpu/test_fused_gemm_epilogue_grad_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_grad_op_xpu.py rename to test/xpu/test_fused_gemm_epilogue_grad_op_xpu.py index 35b943a3f4c77..394fe515554f3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_grad_op_xpu.py +++ b/test/xpu/test_fused_gemm_epilogue_grad_op_xpu.py @@ -13,19 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") - import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py b/test/xpu/test_fused_gemm_epilogue_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py rename to test/xpu/test_fused_gemm_epilogue_op_xpu.py index 590276f58e4dc..37b1271963faf 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py +++ b/test/xpu/test_fused_gemm_epilogue_op_xpu.py @@ -13,18 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import _legacy_C_ops diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/test/xpu/test_fused_resnet_basic_block_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py rename to test/xpu/test_fused_resnet_basic_block_op_xpu.py index 9c3156997035a..060217a6a1082 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py +++ b/test/xpu/test_fused_resnet_basic_block_op_xpu.py @@ -13,13 +13,13 @@ # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import OpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py b/test/xpu/test_gather_nd_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py rename to test/xpu/test_gather_nd_op_xpu.py index a22c10e0fec56..e642afffb44cf 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py +++ b/test/xpu/test_gather_nd_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py b/test/xpu/test_gather_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py rename to test/xpu/test_gather_op_xpu.py index a57af602f9712..0d132e7185e64 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py +++ b/test/xpu/test_gather_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") - import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py b/test/xpu/test_gaussian_random_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py rename to test/xpu/test_gaussian_random_op_xpu.py index 9d5b5e747f445..f30b994dcd18b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py +++ b/test/xpu/test_gaussian_random_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py b/test/xpu/test_gen_bkcl_id_op.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py rename to test/xpu/test_gen_bkcl_id_op.py index e13efff36e484..7c7ae3511a252 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py +++ b/test/xpu/test_gen_bkcl_id_op.py @@ -13,10 +13,7 @@ # limitations under the License. import os -import sys import unittest - -sys.path.append("..") from multiprocessing import Process from launch_function_helper import _find_free_port, wait diff --git a/python/paddle/fluid/tests/unittests/xpu/test_generate_proposals_v2_op_xpu.py b/test/xpu/test_generate_proposals_v2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_generate_proposals_v2_op_xpu.py rename to test/xpu/test_generate_proposals_v2_op_xpu.py index 3a97b28267d92..b7e22032f5f0c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_generate_proposals_v2_op_xpu.py +++ b/test/xpu/test_generate_proposals_v2_op_xpu.py @@ -12,22 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -import unittest - -import numpy as np - -sys.path.append("..") - import copy import math +import unittest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +import numpy as np +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py b/test/xpu/test_grid_sampler_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py rename to test/xpu/test_grid_sampler_op_xpu.py index c92ddc9531b21..1e171f2349392 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py +++ b/test/xpu/test_grid_sampler_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_group_norm_op_xpu.py b/test/xpu/test_group_norm_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_group_norm_op_xpu.py rename to test/xpu/test_group_norm_op_xpu.py index 67161776f81c2..16cec44287df8 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_group_norm_op_xpu.py +++ b/test/xpu/test_group_norm_op_xpu.py @@ -15,17 +15,16 @@ import sys import unittest -import numpy as np - -sys.path.append("..") +sys.path.append('../../python/paddle/fluid/tests/unittests') +import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py b/test/xpu/test_huber_loss_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py rename to test/xpu/test_huber_loss_op_xpu.py index 2a51e6ea95014..fa1e0b4b2ce87 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py +++ b/test/xpu/test_huber_loss_op_xpu.py @@ -15,17 +15,16 @@ import sys import unittest -import numpy as np - -sys.path.append("..") +sys.path.append('../../python/paddle/fluid/tests/unittests') +import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_increment_op_xpu.py b/test/xpu/test_increment_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_increment_op_xpu.py rename to test/xpu/test_increment_op_xpu.py index 8ebbeae9654a6..5ef28f30b44a9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_increment_op_xpu.py +++ b/test/xpu/test_increment_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_index_sample_op_xpu.py b/test/xpu/test_index_sample_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_index_sample_op_xpu.py rename to test/xpu/test_index_sample_op_xpu.py index c9701af3e5786..e5204a1247f46 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_index_sample_op_xpu.py +++ b/test/xpu/test_index_sample_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py b/test/xpu/test_index_select_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py rename to test/xpu/test_index_select_op_xpu.py index 03e7debb59acf..62b9dd54c2e8b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py +++ b/test/xpu/test_index_select_op_xpu.py @@ -12,23 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -from paddle import fluid -from paddle.fluid import Program, program_guard - -sys.path.append("..") - import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle +from paddle import fluid +from paddle.fluid import Program, program_guard paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_instance_norm_op_xpu.py b/test/xpu/test_instance_norm_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_instance_norm_op_xpu.py rename to test/xpu/test_instance_norm_op_xpu.py index 8e0b777ea852c..5eb3e955deddf 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_instance_norm_op_xpu.py +++ b/test/xpu/test_instance_norm_op_xpu.py @@ -12,22 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle -from paddle import fluid -from paddle.fluid import Program, program_guard - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle +from paddle import fluid +from paddle.fluid import Program, program_guard paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py b/test/xpu/test_iou_similarity_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py rename to test/xpu/test_iou_similarity_op_xpu.py index 9d8873666a3ab..301d5fb07b99a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py +++ b/test/xpu/test_iou_similarity_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -from numpy import random - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from numpy import random +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_isfinite_op_xpu.py b/test/xpu/test_isfinite_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_isfinite_op_xpu.py rename to test/xpu/test_isfinite_op_xpu.py index c5253bb90cbaa..93e6cf3533eca 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_isfinite_op_xpu.py +++ b/test/xpu/test_isfinite_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_kldiv_loss_op_xpu.py b/test/xpu/test_kldiv_loss_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_kldiv_loss_op_xpu.py rename to test/xpu/test_kldiv_loss_op_xpu.py index b3e3d7e5a058d..d3dd09b6c3c28 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_kldiv_loss_op_xpu.py +++ b/test/xpu/test_kldiv_loss_op_xpu.py @@ -11,18 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.nn.functional import kl_div diff --git a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py b/test/xpu/test_label_smooth_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py rename to test/xpu/test_label_smooth_op_xpu.py index b83a32a313ad4..4ad7b3dc6a871 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py +++ b/test/xpu/test_label_smooth_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py b/test/xpu/test_lamb_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py rename to test/xpu/test_lamb_op_xpu.py index 70794de507f5e..c82bb5cd4e166 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py +++ b/test/xpu/test_lamb_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py b/test/xpu/test_layer_norm_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py rename to test/xpu/test_layer_norm_op_xpu.py index 12e6b49424093..1b98c4fe081b4 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py +++ b/test/xpu/test_layer_norm_op_xpu.py @@ -12,23 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest from functools import reduce - -import numpy as np - -import paddle - -sys.path.append("..") from operator import mul -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +import numpy as np +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_linspace_op_xpu.py b/test/xpu/test_linspace_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_linspace_op_xpu.py rename to test/xpu/test_linspace_op_xpu.py index 65247c5bec50d..70fdb01b92159 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_linspace_op_xpu.py +++ b/test/xpu/test_linspace_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest, convert_np_dtype_to_dtype_ -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest, convert_np_dtype_to_dtype_ import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py b/test/xpu/test_log_loss_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py rename to test/xpu/test_log_loss_op_xpu.py index 42a59da6d0dde..920c6c1f46931 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py +++ b/test/xpu/test_log_loss_op_xpu.py @@ -13,10 +13,10 @@ # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from eager_op_test import OpTest diff --git a/python/paddle/fluid/tests/unittests/xpu/test_log_softmax_op_xpu.py b/test/xpu/test_log_softmax_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_log_softmax_op_xpu.py rename to test/xpu/test_log_softmax_op_xpu.py index fdaaadcae81e1..269d3e76bca4a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_log_softmax_op_xpu.py +++ b/test/xpu/test_log_softmax_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle import paddle.nn.functional as F diff --git a/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py b/test/xpu/test_logical_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py rename to test/xpu/test_logical_op_xpu.py index b07327283746d..44f891d2e3f65 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py +++ b/test/xpu/test_logical_op_xpu.py @@ -15,17 +15,16 @@ import sys import unittest -import numpy as np - -sys.path.append("..") +sys.path.append('../../python/paddle/fluid/tests/unittests') +import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py b/test/xpu/test_logsumexp_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py rename to test/xpu/test_logsumexp_op_xpu.py index 46515eb6b1cfd..1d871797bb60c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py +++ b/test/xpu/test_logsumexp_op_xpu.py @@ -12,15 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -import paddle - -sys.path.append("..") import numpy as np from op_test_xpu import XPUOpTest +import paddle + paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py b/test/xpu/test_lookup_table_v2_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py rename to test/xpu/test_lookup_table_v2_op_xpu.py index 8cb36afb2e490..7af995692a7a3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py +++ b/test/xpu/test_lookup_table_v2_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py b/test/xpu/test_masked_select_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py rename to test/xpu/test_masked_select_op_xpu.py index d526dae396dde..4ed6cd0a06e37 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py +++ b/test/xpu/test_masked_select_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py b/test/xpu/test_matmul_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py rename to test/xpu/test_matmul_op_xpu.py index 3484264cff6dd..07cea1b943c91 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py +++ b/test/xpu/test_matmul_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py b/test/xpu/test_matmul_v2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py rename to test/xpu/test_matmul_v2_op_xpu.py index 4149af1226852..eb10d1462e466 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py +++ b/test/xpu/test_matmul_v2_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/test/xpu/test_mean_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py rename to test/xpu/test_mean_op_xpu.py index a13bea88b6a80..66ed8d7edbce3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py +++ b/test/xpu/test_mean_op_xpu.py @@ -12,21 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") from op_test_xpu import XPUOpTest +import paddle from paddle.fluid import Program, program_guard np.random.seed(10) -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu.py b/test/xpu/test_merged_momentum_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu.py rename to test/xpu/test_merged_momentum_op_xpu.py index 1a6455a2a712e..8f3afc5a32697 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu.py +++ b/test/xpu/test_merged_momentum_op_xpu.py @@ -12,17 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") - -from test_merged_momentum_op_xpu_base import TestMergedMomentumBase -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from test_merged_momentum_op_xpu_base import TestMergedMomentumBase import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu_base.py b/test/xpu/test_merged_momentum_op_xpu_base.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu_base.py rename to test/xpu/test_merged_momentum_op_xpu_base.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_meshgrid_op_xpu.py b/test/xpu/test_meshgrid_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_meshgrid_op_xpu.py rename to test/xpu/test_meshgrid_op_xpu.py index dfb70604d65e1..6c00fa39d71bf 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_meshgrid_op_xpu.py +++ b/test/xpu/test_meshgrid_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py b/test/xpu/test_momentum_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py rename to test/xpu/test_momentum_op_xpu.py index 73d39c17ed072..50854cdeb9fae 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py +++ b/test/xpu/test_momentum_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py b/test/xpu/test_mul_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py rename to test/xpu/test_mul_op_xpu.py index 760f88bea0f25..a924cf42e84a3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py +++ b/test/xpu/test_mul_op_xpu.py @@ -12,19 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np +from op_test_xpu import XPUOpTest import paddle -sys.path.append("..") -from op_test_xpu import XPUOpTest - paddle.enable_static() -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py b/test/xpu/test_nearest_interp_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py rename to test/xpu/test_nearest_interp_op_xpu.py index 441439838cbcd..235ccbdd2de9c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py +++ b/test/xpu/test_nearest_interp_op_xpu.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import paddle -sys.path.append("..") - paddle.enable_static() ''' def nearest_neighbor_interp_np(X, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py b/test/xpu/test_nearest_interp_v2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py rename to test/xpu/test_nearest_interp_v2_op_xpu.py index 35c362b9a9aba..9caac459a9451 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py +++ b/test/xpu/test_nearest_interp_v2_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py b/test/xpu/test_one_hot_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py rename to test/xpu/test_one_hot_op_xpu.py index 4a24e3e2028e5..941387b3eb1fb 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py +++ b/test/xpu/test_one_hot_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py b/test/xpu/test_one_hot_v2_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py rename to test/xpu/test_one_hot_v2_op_xpu.py index 7fca3ab6827ab..80a60eed539c0 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py +++ b/test/xpu/test_one_hot_v2_op_xpu.py @@ -12,22 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle -from paddle import fluid -from paddle.fluid import core - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle +from paddle import fluid +from paddle.fluid import core paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_p_norm_op_xpu.py b/test/xpu/test_p_norm_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_p_norm_op_xpu.py rename to test/xpu/test_p_norm_op_xpu.py index 959ae77ca0117..3f09c8eeda772 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_p_norm_op_xpu.py +++ b/test/xpu/test_p_norm_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py b/test/xpu/test_pad3d_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py rename to test/xpu/test_pad3d_op_xpu.py index 7c4db207c8ef9..2757ed1e3e70a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py +++ b/test/xpu/test_pad3d_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle import paddle.nn.functional as F diff --git a/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py b/test/xpu/test_parallel_dygraph_dataparallel.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py rename to test/xpu/test_parallel_dygraph_dataparallel.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pixel_shuffle_op_xpu.py b/test/xpu/test_pixel_shuffle_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_pixel_shuffle_op_xpu.py rename to test/xpu/test_pixel_shuffle_op_xpu.py index 6674cf33ebb04..444066ffbc548 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_pixel_shuffle_op_xpu.py +++ b/test/xpu/test_pixel_shuffle_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py b/test/xpu/test_pool2d_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py rename to test/xpu/test_pool2d_op_xpu.py index f5a7bb398d63b..5c4233ee36e78 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py +++ b/test/xpu/test_pool2d_op_xpu.py @@ -12,19 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from test_pool2d_op import adaptive_end_index, adaptive_start_index -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest +from test_pool2d_op import adaptive_end_index, adaptive_start_index import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pool3d_op_xpu.py b/test/xpu/test_pool3d_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_pool3d_op_xpu.py rename to test/xpu/test_pool3d_op_xpu.py index 06161a14054c8..43b3675563e64 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_pool3d_op_xpu.py +++ b/test/xpu/test_pool3d_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pool_max_op_xpu.py b/test/xpu/test_pool_max_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_pool_max_op_xpu.py rename to test/xpu/test_pool_max_op_xpu.py index 57f09ab1f7410..0eb11bb83b70d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_pool_max_op_xpu.py +++ b/test/xpu/test_pool_max_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py b/test/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py rename to test/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py index 78ca6933181aa..71da7768cc12f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py +++ b/test/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py @@ -15,14 +15,14 @@ import sys import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + +from get_test_cover_info import record_op_test + import paddle from paddle.fluid.contrib.layers.nn import pow2_decay_with_linear_warmup from paddle.optimizer.lr import LinearWarmup, PolynomialDecay -sys.path.append("..") - -from xpu.get_test_cover_info import record_op_test - def gen_pow2_warmup_op_lr(warmup_steps, total_steps, base_lr, end_lr, place): main = paddle.static.Program() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py b/test/xpu/test_prelu_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py rename to test/xpu/test_prelu_op_xpu.py index 6bd4fcf8d5c56..0a0ea28269722 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py +++ b/test/xpu/test_prelu_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py b/test/xpu/test_prior_box_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py rename to test/xpu/test_prior_box_op_xpu.py index 52d3ca875efdf..3b69cbaba341e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py +++ b/test/xpu/test_prior_box_op_xpu.py @@ -13,19 +13,15 @@ # limitations under the License. import math -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_prod_op_xpu.py b/test/xpu/test_prod_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_prod_op_xpu.py rename to test/xpu/test_prod_op_xpu.py index 1fb907f9f0924..a873fa8ecaf31 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_prod_op_xpu.py +++ b/test/xpu/test_prod_op_xpu.py @@ -15,9 +15,9 @@ import sys import unittest -import numpy as np +sys.path.append('../../python/paddle/fluid/tests/unittests') -sys.path.append("..") +import numpy as np from test_sum_op import TestReduceOPTensorAxisBase import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_randint_op_xpu.py b/test/xpu/test_randint_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_randint_op_xpu.py rename to test/xpu/test_randint_op_xpu.py index baeff8a10a640..e697109a1baea 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_randint_op_xpu.py +++ b/test/xpu/test_randint_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_randperm_op_xpu.py b/test/xpu/test_randperm_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_randperm_op_xpu.py rename to test/xpu/test_randperm_op_xpu.py index 0e285f6b03c0e..f28944e0009a2 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_randperm_op_xpu.py +++ b/test/xpu/test_randperm_op_xpu.py @@ -12,22 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle -from paddle.fluid import core -from paddle.static import Program, program_guard - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle +from paddle.fluid import core +from paddle.static import Program, program_guard paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py b/test/xpu/test_range_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py rename to test/xpu/test_range_xpu.py index 2870cbb7a7cc6..f202a08c0f364 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py +++ b/test/xpu/test_range_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py b/test/xpu/test_recompute_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_recompute_op_xpu.py rename to test/xpu/test_recompute_op_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py b/test/xpu/test_reduce_all_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py rename to test/xpu/test_reduce_all_op_xpu.py index 987b968b0a691..313d8297a1705 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py +++ b/test/xpu/test_reduce_all_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amax_op_xpu.py b/test/xpu/test_reduce_amax_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_amax_op_xpu.py rename to test/xpu/test_reduce_amax_op_xpu.py index 49ffef884d3db..0de9b6c6e7306 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amax_op_xpu.py +++ b/test/xpu/test_reduce_amax_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amin_op_xpu.py b/test/xpu/test_reduce_amin_op_xpu.py similarity index 96% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_amin_op_xpu.py rename to test/xpu/test_reduce_amin_op_xpu.py index 4f2ca6fea3ff8..ad1d643bb9703 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amin_op_xpu.py +++ b/test/xpu/test_reduce_amin_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_any_op_xpu.py b/test/xpu/test_reduce_any_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_any_op_xpu.py rename to test/xpu/test_reduce_any_op_xpu.py index a255dc390bcc0..5b4e0740cfacc 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_any_op_xpu.py +++ b/test/xpu/test_reduce_any_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py b/test/xpu/test_reduce_max_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py rename to test/xpu/test_reduce_max_op_xpu.py index dd00a711f85ac..1b76f78d09ac7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py +++ b/test/xpu/test_reduce_max_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py b/test/xpu/test_reduce_mean_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py rename to test/xpu/test_reduce_mean_op_xpu.py index ed3d51ff4fd62..d2447debaa479 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py +++ b/test/xpu/test_reduce_mean_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py b/test/xpu/test_reduce_min_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py rename to test/xpu/test_reduce_min_op_xpu.py index 87ab399863596..692d06df6a6d2 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py +++ b/test/xpu/test_reduce_min_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py b/test/xpu/test_reduce_prod_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py rename to test/xpu/test_reduce_prod_op_xpu.py index 1e9c259f0e580..ab44b1be351e9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py +++ b/test/xpu/test_reduce_prod_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py b/test/xpu/test_reduce_sum_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py rename to test/xpu/test_reduce_sum_op_xpu.py index 4137b2b18cbc2..e6ed19365c65e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py +++ b/test/xpu/test_reduce_sum_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py b/test/xpu/test_refactor_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py rename to test/xpu/test_refactor_op_xpu.py index 55f32c876c74a..976a33244209a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py +++ b/test/xpu/test_refactor_op_xpu.py @@ -15,17 +15,16 @@ import sys import unittest -import numpy as np - -sys.path.append("..") +sys.path.append('../../python/paddle/fluid/tests/unittests') +import numpy as np from eager_op_test import OpTest -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py b/test/xpu/test_reshape2_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py rename to test/xpu/test_reshape2_op_xpu.py index 41415a574b589..ce825d89c0957 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py +++ b/test/xpu/test_reshape2_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py b/test/xpu/test_rmsprop_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py rename to test/xpu/test_rmsprop_op_xpu.py index c905eb9a57974..604f9e4bb00bd 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py +++ b/test/xpu/test_rmsprop_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py b/test/xpu/test_rnn_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py rename to test/xpu/test_rnn_op_xpu.py index e28a7ff9c10d5..2b6100247e379 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py +++ b/test/xpu/test_rnn_op_xpu.py @@ -10,26 +10,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import random +import sys import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core -sys.path.append("../rnn") +sys.path.append('../../python/paddle/fluid/tests/unittests/rnn') from convert import get_params_for_net -from rnn_numpy import LSTM -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from rnn_numpy import LSTM random.seed(2) np.set_printoptions(threshold=np.inf) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py b/test/xpu/test_roi_align_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py rename to test/xpu/test_roi_align_op_xpu.py index 1c3ad0af30c9c..d65f78be1a488 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py +++ b/test/xpu/test_roi_align_op_xpu.py @@ -12,19 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import math import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_roll_op_xpu.py b/test/xpu/test_roll_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_roll_op_xpu.py rename to test/xpu/test_roll_op_xpu.py index 25b156a280a64..8c3a9c6fcb164 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_roll_op_xpu.py +++ b/test/xpu/test_roll_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py b/test/xpu/test_scale_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py rename to test/xpu/test_scale_op_xpu.py index 1dd41e90a1700..fbc3b7f820856 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py +++ b/test/xpu/test_scale_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import Program, program_guard diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scatter_nd_add_op_xpu.py b/test/xpu/test_scatter_nd_add_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_scatter_nd_add_op_xpu.py rename to test/xpu/test_scatter_nd_add_op_xpu.py index 12e159706ea1e..f303cd9ce5150 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_scatter_nd_add_op_xpu.py +++ b/test/xpu/test_scatter_nd_add_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py b/test/xpu/test_scatter_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py rename to test/xpu/test_scatter_op_xpu.py index 565549f0f16bb..50c860bdd8673 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py +++ b/test/xpu/test_scatter_op_xpu.py @@ -12,20 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, type_dict_str_to_numpy, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py b/test/xpu/test_sequence_conv_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py rename to test/xpu/test_sequence_conv_op_xpu.py index a4f960fc9e31b..4a52ea54f4aff 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py +++ b/test/xpu/test_sequence_conv_op_xpu.py @@ -21,12 +21,12 @@ import paddle sys.path.append("../") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest paddle.enable_static() np.set_printoptions(threshold=np.inf) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sequence_unpad_op_xpu.py b/test/xpu/test_sequence_unpad_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_sequence_unpad_op_xpu.py rename to test/xpu/test_sequence_unpad_op_xpu.py index 65f52bcfc0b1d..15215fcb0c614 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sequence_unpad_op_xpu.py +++ b/test/xpu/test_sequence_unpad_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py b/test/xpu/test_set_value_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py rename to test/xpu/test_set_value_op_xpu.py index 90277c7f484d6..e749eb8bc1b11 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py +++ b/test/xpu/test_set_value_op_xpu.py @@ -21,12 +21,12 @@ import numpy as np sys.path.append("../") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid.layer_helper import LayerHelper diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/test/xpu/test_sgd_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py rename to test/xpu/test_sgd_op_xpu.py index 42cdfd0c82d2a..6c57c19438ad6 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py +++ b/test/xpu/test_sgd_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py b/test/xpu/test_shape_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py rename to test/xpu/test_shape_op_xpu.py index 2f8d7ec830077..a812369ea526e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py +++ b/test/xpu/test_shape_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py b/test/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py rename to test/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py index 8c0b3e4c73384..30369e9f22d85 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py +++ b/test/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py @@ -12,19 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from scipy.special import expit, logit -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest +from scipy.special import expit, logit import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py b/test/xpu/test_sign_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py rename to test/xpu/test_sign_op_xpu.py index 8743310a9c697..e6b2334f9b7f3 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py +++ b/test/xpu/test_sign_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py b/test/xpu/test_slice_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py rename to test/xpu/test_slice_op_xpu.py index 09368723a1f48..f19c3d37e283e 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py +++ b/test/xpu/test_slice_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py b/test/xpu/test_softmax_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py rename to test/xpu/test_softmax_op_xpu.py index 24c25bbe1a88e..9b849832bd984 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py +++ b/test/xpu/test_softmax_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() np.random.seed(10) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py b/test/xpu/test_softmax_with_cross_entropy_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py rename to test/xpu/test_softmax_with_cross_entropy_op_xpu.py index 1ecc1eb4934ca..cb623e900d42b 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py +++ b/test/xpu/test_softmax_with_cross_entropy_op_xpu.py @@ -12,20 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") - import unittest import numpy as np -from op_test_xpu import XPUOpTest -from test_softmax_op import stable_softmax -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest +from test_softmax_op import stable_softmax import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py b/test/xpu/test_split_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py rename to test/xpu/test_split_op_xpu.py index dca61b4b129a1..8bc7ee9af1b04 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py +++ b/test/xpu/test_split_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py b/test/xpu/test_squeeze2_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py rename to test/xpu/test_squeeze2_op_xpu.py index b9598bc3ca08a..4e26152551c57 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py +++ b/test/xpu/test_squeeze2_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") - import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py b/test/xpu/test_squeeze_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py rename to test/xpu/test_squeeze_op_xpu.py index 85339b9eb8b8a..5aae366c85635 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py +++ b/test/xpu/test_squeeze_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") - import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py b/test/xpu/test_stack_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py rename to test/xpu/test_stack_op_xpu.py index b13e1b9b300aa..3732de7dc33f5 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py +++ b/test/xpu/test_stack_op_xpu.py @@ -13,18 +13,18 @@ # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append("../../python/paddle/fluid/tests/unittests") + import numpy as np from eager_op_test import skip_check_grad_ci -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_strided_slice_op_xpu.py b/test/xpu/test_strided_slice_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_strided_slice_op_xpu.py rename to test/xpu/test_strided_slice_op_xpu.py index 7659ffd4ae0c6..63954dfd7859c 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_strided_slice_op_xpu.py +++ b/test/xpu/test_strided_slice_op_xpu.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py b/test/xpu/test_sum_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py rename to test/xpu/test_sum_op_xpu.py index 77d934e478cb5..3b51b0adb76d0 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py +++ b/test/xpu/test_sum_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_temporal_shift_op_xpu.py b/test/xpu/test_temporal_shift_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_temporal_shift_op_xpu.py rename to test/xpu/test_temporal_shift_op_xpu.py index 4a1967326504f..71904903fc145 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_temporal_shift_op_xpu.py +++ b/test/xpu/test_temporal_shift_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle import paddle.nn.functional as F diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py b/test/xpu/test_tile_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py rename to test/xpu/test_tile_op_xpu.py index c6f9c79be4d6f..dc2b0d7f0edcd 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py +++ b/test/xpu/test_tile_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py b/test/xpu/test_top_k_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py rename to test/xpu/test_top_k_op_xpu.py index 8dfbddbb1cf59..131bb0c1d0711 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py +++ b/test/xpu/test_top_k_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py b/test/xpu/test_top_k_v2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py rename to test/xpu/test_top_k_v2_op_xpu.py index eaad7001928fa..8230aa0ff5d22 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py +++ b/test/xpu/test_top_k_v2_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py b/test/xpu/test_transpose_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py rename to test/xpu/test_transpose_op_xpu.py index 458cf8a667421..f314eb6e4dc77 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py +++ b/test/xpu/test_transpose_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py b/test/xpu/test_tril_triu_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py rename to test/xpu/test_tril_triu_op_xpu.py index 010cf6fb6102e..15371d894fa8d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py +++ b/test/xpu/test_tril_triu_op_xpu.py @@ -10,19 +10,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") - import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import tensor diff --git a/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py b/test/xpu/test_truncated_gaussian_random_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py rename to test/xpu/test_truncated_gaussian_random_op_xpu.py index 7355acdfcee48..c217a2641d160 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py +++ b/test/xpu/test_truncated_gaussian_random_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unbind_op_xpu.py b/test/xpu/test_unbind_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_unbind_op_xpu.py rename to test/xpu/test_unbind_op_xpu.py index fd0f36677f8fc..dc8ea7ae6bc14 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_unbind_op_xpu.py +++ b/test/xpu/test_unbind_op_xpu.py @@ -12,17 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid, tensor diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py b/test/xpu/test_unfold_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py rename to test/xpu/test_unfold_op_xpu.py index e1034d6363628..c6e80469f7d0d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py +++ b/test/xpu/test_unfold_op_xpu.py @@ -12,21 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -import paddle -from paddle import fluid - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest + +import paddle +from paddle import fluid paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py b/test/xpu/test_uniform_random_op_xpu.py similarity index 95% rename from python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py rename to test/xpu/test_uniform_random_op_xpu.py index 3dff72b5d680c..f5fd57bd36696 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py +++ b/test/xpu/test_uniform_random_op_xpu.py @@ -13,10 +13,10 @@ # limitations under the License. import sys - -sys.path.append("..") import unittest +sys.path.append('../../python/paddle/fluid/tests/unittests') + import numpy as np from test_uniform_random_op import ( TestUniformRandomOp, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py b/test/xpu/test_unsqueeze2_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py rename to test/xpu/test_unsqueeze2_op_xpu.py index 56862299074f3..d8cb02e64f993 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py +++ b/test/xpu/test_unsqueeze2_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") - import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py b/test/xpu/test_unsqueeze_op_xpu.py similarity index 97% rename from python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py rename to test/xpu/test_unsqueeze_op_xpu.py index 4f2b1d2b5a8ad..333633031bdfd 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py +++ b/test/xpu/test_unsqueeze_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") - import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py b/test/xpu/test_unstack_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py rename to test/xpu/test_unstack_op_xpu.py index 6195ec55abd41..9d305a312b74b 100755 --- a/python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py +++ b/test/xpu/test_unstack_op_xpu.py @@ -12,18 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle diff --git a/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py b/test/xpu/test_update_loss_scaling_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py rename to test/xpu/test_update_loss_scaling_op_xpu.py index e2b7263fed26e..86e6aac6badb5 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py +++ b/test/xpu/test_update_loss_scaling_op_xpu.py @@ -12,17 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest -sys.path.append("..") import numpy as np -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py b/test/xpu/test_warpctc_op_xpu.py similarity index 99% rename from python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py rename to test/xpu/test_warpctc_op_xpu.py index d09db48cffc51..95cf65075472f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py +++ b/test/xpu/test_warpctc_op_xpu.py @@ -13,18 +13,16 @@ # limitations under the License. import sys - -sys.path.append("..") import unittest import numpy as np -from op_test_xpu import XPUOpTest -from test_softmax_op import stable_softmax -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest +from test_softmax_op import stable_softmax import paddle import paddle.nn.functional as F diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py b/test/xpu/test_where_index_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py rename to test/xpu/test_where_index_xpu.py index 1a8e7aa96453c..cca29f5737336 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py +++ b/test/xpu/test_where_index_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py b/test/xpu/test_where_op_xpu.py similarity index 98% rename from python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py rename to test/xpu/test_where_op_xpu.py index a7a26f32b02cb..8dd7500517aed 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py +++ b/test/xpu/test_where_op_xpu.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import unittest import numpy as np - -sys.path.append("..") - -from op_test_xpu import XPUOpTest -from xpu.get_test_cover_info import ( +from get_test_cover_info import ( XPUOpTestWrapper, create_test_class, get_xpu_op_support_types, ) +from op_test_xpu import XPUOpTest import paddle from paddle import fluid diff --git a/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py b/test/xpu/test_while_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py rename to test/xpu/test_while_op_xpu.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py b/test/xpu/test_xpu_place.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py rename to test/xpu/test_xpu_place.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_xpu_stream_event.py b/test/xpu/test_xpu_stream_event.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_xpu_stream_event.py rename to test/xpu/test_xpu_stream_event.py diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/test/xpu/test_zero_dim_tensor_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py rename to test/xpu/test_zero_dim_tensor_xpu.py diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py index c6f190189a67f..14c4e5075f6d8 100644 --- a/tools/get_pr_ut.py +++ b/tools/get_pr_ut.py @@ -406,7 +406,7 @@ def get_pr_ut(self): ut_list.append('md_placeholder') onlyCommentsFilesOrXpu.append(f_judge) elif ( - 'tests/unittests/xpu' in f_judge + 'test/xpu' in f_judge or 'tests/unittests/npu' in f_judge or 'op_npu.cc' in f_judge ): From b835d958e53ab7f18f77cbb9797e607f83db4447 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Wed, 12 Apr 2023 10:30:03 +0800 Subject: [PATCH 32/59] fix convert_to_mixed_precision api save model bug (#52767) * update save model * update --- .../passes/convert_to_mixed_precision.cc | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc index 2589a20eb284d..963197850c9fd 100644 --- a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc +++ b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc @@ -102,32 +102,53 @@ void ConvertToMixedPrecisionPass::SaveMixedModel() { framework::ProgramDesc mixed_program_desc; framework::ir::GraphToProgram(*main_graph_, &mixed_program_desc); - auto parameters = scope_.LocalVarNames(); - std::sort(parameters.begin(), parameters.end()); - - auto SerializeParams = [&]() -> std::string { - std::ostringstream os; - phi::CPUContext ctx; - for (const auto& param : parameters) { - PADDLE_ENFORCE_NOT_NULL( - scope_.FindVar(param), - platform::errors::NotFound( - "Block should already have a '%s' variable", param)); - auto* tensor = scope_.FindVar(param)->GetMutable(); - framework::SerializeToStream(os, *tensor, ctx); + auto SerializeParams = [&](const std::string& path) { + auto IsPersistable = [](const framework::VarDesc* var) { + if (var->Persistable() && + var->GetType() != framework::proto::VarType::FEED_MINIBATCH && + var->GetType() != framework::proto::VarType::FETCH_LIST && + var->GetType() != framework::proto::VarType::RAW) { + return true; + } + return false; + }; + framework::ProgramDesc save_program; + auto* save_block = save_program.MutableBlock(0); + + const auto& global_block = mixed_program_desc.Block(0); + std::vector save_var_list; + for (framework::VarDesc* var : global_block.AllVars()) { + if (IsPersistable(var)) { + framework::VarDesc* new_var = save_block->Var(var->Name()); + new_var->SetShape(var->GetShape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + + save_var_list.push_back(new_var->Name()); + } } - return os.str(); + std::sort(save_var_list.begin(), save_var_list.end()); + auto* op = save_block->AppendOp(); + op->SetType("save_combine"); + op->SetInput("X", save_var_list); + op->SetAttr("file_path", path); + op->CheckAttrs(); + + framework::Executor exe(platform::CPUPlace{}); + exe.Run(save_program, &scope_, 0, true, true); }; - auto StrToBinary = [](const std::string& path, const std::string& str) { + auto SerializeProg = [&](const std::string& path) { + auto str = mixed_program_desc.Proto()->SerializeAsString(); std::ofstream file(path.c_str(), std::ios::binary); file.write(str.c_str(), str.size()); file.close(); }; - StrToBinary(mixed_model_file_, - mixed_program_desc.Proto()->SerializeAsString()); - StrToBinary(mixed_params_file_, SerializeParams()); + SerializeProg(mixed_model_file_); + SerializeParams(mixed_params_file_); } bool OpSupportPrecision(const std::string& op_type, From d12b1ffa4bca06c10ed9b70a2675285cfaae818b Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Wed, 12 Apr 2023 10:32:00 +0800 Subject: [PATCH 33/59] move delete_cast_op_pass (#52788) --- paddle/fluid/framework/ir/CMakeLists.txt | 11 +++-- .../ir/{xpu => }/delete_cast_op_pass.cc | 49 +++++++++---------- .../ir/{xpu => }/delete_cast_op_pass.h | 0 .../ir/{xpu => }/delete_cast_op_pass_test.cc | 0 .../inference/api/paddle_pass_builder.cc | 1 + 5 files changed, 31 insertions(+), 30 deletions(-) rename paddle/fluid/framework/ir/{xpu => }/delete_cast_op_pass.cc (93%) rename paddle/fluid/framework/ir/{xpu => }/delete_cast_op_pass.h (100%) rename paddle/fluid/framework/ir/{xpu => }/delete_cast_op_pass_test.cc (100%) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 91c3ba6d608b4..b1db3dd0a43cb 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -126,6 +126,7 @@ pass_library(matmul_scale_fuse_pass inference) pass_library(gpu_cpu_map_matmul_to_mul_pass inference) pass_library(dense_fc_to_sparse_pass inference) pass_library(dense_multihead_matmul_to_sparse_pass inference) +pass_library(delete_cast_op_pass inference) pass_library(generate_pass DEPS pass_desc_proto) target_link_libraries(generate_pass pass_desc_proto) @@ -242,7 +243,6 @@ if(WITH_XPU) pass_library(fused_multi_transformer_xpu_quant_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) pass_library(stack_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) - pass_library(delete_cast_op_pass inference DIR xpu DEPS ${XPU_PASS_DEPS}) endif() cc_library( @@ -407,6 +407,11 @@ cc_test( test_delete_dequant_weight_linear_op_pass SRCS delete_weight_dequant_linear_op_pass_tester.cc DEPS delete_weight_dequant_linear_op_pass) +cc_test( + test_delete_cast_op_pass + SRCS delete_cast_op_pass_test.cc + DEPS delete_cast_op_pass) + if(WITH_GPU OR WITH_ROCM) cc_test( test_embedding_eltwise_layernorm_fuse_pass @@ -521,8 +526,4 @@ if(WITH_XPU) test_stack_fuse_pass SRCS xpu/stack_fuse_pass_test.cc DEPS stack_fuse_pass) - cc_test( - test_delete_cast_op_pass - SRCS xpu/delete_cast_op_pass_test.cc - DEPS delete_cast_op_pass) endif() diff --git a/paddle/fluid/framework/ir/xpu/delete_cast_op_pass.cc b/paddle/fluid/framework/ir/delete_cast_op_pass.cc similarity index 93% rename from paddle/fluid/framework/ir/xpu/delete_cast_op_pass.cc rename to paddle/fluid/framework/ir/delete_cast_op_pass.cc index fb417322476b2..bfda0f3238010 100644 --- a/paddle/fluid/framework/ir/xpu/delete_cast_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_cast_op_pass.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/xpu/delete_cast_op_pass.h" -#include +#include "paddle/fluid/framework/ir/delete_cast_op_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/xpu/pass_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -127,11 +126,11 @@ int DeleteCastOpPass::ApplyCastWriteReadPass(ir::Graph* graph) const { auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { VLOG(4) << "handle ApplyCastWriteReadPass fuse"; - GET_IR_NODE(cast0); - GET_IR_NODE(write_to_array); - GET_IR_NODE(cast0_in); - GET_IR_NODE(cast0_out); - GET_IR_NODE(write_to_array_out); + GET_IR_NODE_FROM_SUBGRAPH(cast0, cast0, pattern); + GET_IR_NODE_FROM_SUBGRAPH(write_to_array, write_to_array, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast0_in, cast0_in, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast0_out, cast0_out, pattern); + GET_IR_NODE_FROM_SUBGRAPH(write_to_array_out, write_to_array_out, pattern); // write_to_array_out(in graph1) may not link to any op nodes, so we fine // read_from_array by write_to_array_out name. @@ -281,13 +280,13 @@ int DeleteCastOpPass::ApplyCastLodResetWriteReadPass(ir::Graph* graph) const { auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { VLOG(4) << "handle ApplyCastLodResetWriteReadPass fuse"; - GET_IR_NODE(cast0); - GET_IR_NODE(lod_reset); - GET_IR_NODE(write_to_array); - GET_IR_NODE(cast0_in); - GET_IR_NODE(cast0_out); - GET_IR_NODE(lod_reset_out); - GET_IR_NODE(write_to_array_out); + GET_IR_NODE_FROM_SUBGRAPH(cast0, cast0, pattern); + GET_IR_NODE_FROM_SUBGRAPH(lod_reset, lod_reset, pattern); + GET_IR_NODE_FROM_SUBGRAPH(write_to_array, write_to_array, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast0_in, cast0_in, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast0_out, cast0_out, pattern); + GET_IR_NODE_FROM_SUBGRAPH(lod_reset_out, lod_reset_out, pattern); + GET_IR_NODE_FROM_SUBGRAPH(write_to_array_out, write_to_array_out, pattern); // write_to_array_out(in graph1) may not link to any op nodes, so we fine // read_from_array by write_to_array_out name. @@ -482,13 +481,13 @@ int DeleteCastOpPass::ApplyCastIndexSamplePass(ir::Graph* graph) const { auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { VLOG(4) << "handle ApplyCastIndexSamplePass fuse"; - GET_IR_NODE(cast0); - GET_IR_NODE(index_sample); - GET_IR_NODE(cast1); - GET_IR_NODE(cast0_in); - GET_IR_NODE(cast0_out); - GET_IR_NODE(index_sample_out); - GET_IR_NODE(cast1_out); + GET_IR_NODE_FROM_SUBGRAPH(cast0, cast0, pattern); + GET_IR_NODE_FROM_SUBGRAPH(index_sample, index_sample, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast1, cast1, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast0_in, cast0_in, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast0_out, cast0_out, pattern); + GET_IR_NODE_FROM_SUBGRAPH(index_sample_out, index_sample_out, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast1_out, cast1_out, pattern); index_sample->Op()->RenameInput(cast0_out->Name(), cast0_in->Name()); index_sample->Op()->RenameOutput(index_sample_out->Name(), @@ -545,9 +544,9 @@ int DeleteCastOpPass::ApplyCastPass(ir::Graph* graph) const { auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { VLOG(4) << "handle ApplyCastPass fuse"; - GET_IR_NODE(cast); - GET_IR_NODE(cast_in); - GET_IR_NODE(cast_out); + GET_IR_NODE_FROM_SUBGRAPH(cast, cast, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast_in, cast_in, pattern); + GET_IR_NODE_FROM_SUBGRAPH(cast_out, cast_out, pattern); for (auto* out_op_node : cast_out->outputs) { out_op_node->Op()->RenameInput(cast_out->Name(), cast_in->Name()); IR_NODE_LINK_TO(cast_in, out_op_node); diff --git a/paddle/fluid/framework/ir/xpu/delete_cast_op_pass.h b/paddle/fluid/framework/ir/delete_cast_op_pass.h similarity index 100% rename from paddle/fluid/framework/ir/xpu/delete_cast_op_pass.h rename to paddle/fluid/framework/ir/delete_cast_op_pass.h diff --git a/paddle/fluid/framework/ir/xpu/delete_cast_op_pass_test.cc b/paddle/fluid/framework/ir/delete_cast_op_pass_test.cc similarity index 100% rename from paddle/fluid/framework/ir/xpu/delete_cast_op_pass_test.cc rename to paddle/fluid/framework/ir/delete_cast_op_pass_test.cc diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 3cc8b077ad7e6..a1fe08b081eeb 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -276,6 +276,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { "transpose_flatten_concat_fuse_pass", // "conv2d_fusion_layout_transfer_pass", // "auto_mixed_precision_pass", // + "delete_cast_op_pass", // "inplace_op_var_pass", // should be the last pass. }); From 8d7c15a7b04f833f97dc09abf4f62ca411b5728e Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Wed, 12 Apr 2023 10:46:37 +0800 Subject: [PATCH 34/59] [CINN] add cinn sub-graph save into graphviz flag (#52766) --- .../framework/paddle2cinn/cinn_compiler.cc | 23 ++++++++++++++++++- paddle/phi/core/flags.cc | 14 +++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 359bab844303f..4c1538a28fedb 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -31,6 +31,7 @@ #include "cinn/frontend/syntax.h" #include "cinn/hlir/framework/graph.h" #include "cinn/hlir/framework/graph_compiler.h" +#include "cinn/hlir/framework/visualize_helper.h" #include "gflags/gflags.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/ir/graph.h" @@ -49,6 +50,7 @@ DECLARE_bool(enable_pe_launch_cinn); DECLARE_bool(enable_cinn_auto_tune); +DECLARE_string(cinn_subgraph_graphviz_dir); namespace paddle { namespace framework { namespace paddle2cinn { @@ -73,7 +75,6 @@ const CinnCompiledObject &CinnCompiler::Compile( const std::map &input_tensors, const Target &target, void *stream) { - VLOG(4) << "-- The graph to be compiled is:\n" << VizGraph(graph); CinnCacheKeyByAddress cur_key_by_address( graph, input_tensors, target.arch_str()); CinnCacheKeyByStructure cur_key_by_struct; @@ -85,6 +86,26 @@ const CinnCompiledObject &CinnCompiler::Compile( if (!cache_by_struct_.count(cur_key_by_struct)) { VLOG(4) << "Not found CinnCompiledObject in cache_by_struct_."; std::int64_t compiled_num = real_compiled_num_.fetch_add(1); + + if (!FLAGS_cinn_subgraph_graphviz_dir.empty()) { + const std::string &viz_path = FLAGS_cinn_subgraph_graphviz_dir + + "/fusion_groups_" + + std::to_string(compiled_num) + "/"; + if (!::cinn::hlir::framework::MakeDirectory( + viz_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) { + LOG_IF(WARNING, compiled_num == 0) + << "Failed to make directory: \"" << viz_path + << "\", the CINN subgraph's graphviz dot file will not print."; + } else { + LOG_IF(INFO, compiled_num == 0) + << "The CINN subgraph's graphviz dot file will writing into " + "path: \"" + << FLAGS_cinn_subgraph_graphviz_dir << "\""; + ::cinn::hlir::framework::WriteToFile(viz_path + "cinn_subgraph.dot", + VizGraph(graph)); + } + } + auto compiled_res = CompileGraph(graph, input_tensors, target, compiled_num, stream); std::unique_lock guard(lock_); diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc index 9cff3acccbd41..ad2e38b70d0ed 100644 --- a/paddle/phi/core/flags.cc +++ b/paddle/phi/core/flags.cc @@ -1001,6 +1001,20 @@ PADDLE_DEFINE_EXPORTED_bool(enable_cinn_auto_tune, "It controls whether to use cinn with " "its auto-tune feature enabled"); +/* + * CINN related FLAG + * Name: FLAGS_cinn_subgraph_graphviz_dir + * Since Version: 2.3 + * Value Range: string, default="" + * Example: FLAGS_cinn_subgraph_graphviz_dir="./cinn_graph/" will save the + * CINN sub-graph into "./cinn_graph/", and each sub-graph will save into + * "fusion_groups_*"" directory + */ +PADDLE_DEFINE_EXPORTED_string(cinn_subgraph_graphviz_dir, + "", + "Specify the directory path of dot file of " + "graph, which is used for debug."); + #endif /* From 189e0d44eaa3ef7833d1f7ed351ebcbc3113b83a Mon Sep 17 00:00:00 2001 From: wangzhen38 <41941775+wangzhen38@users.noreply.github.com> Date: Wed, 12 Apr 2023 11:01:45 +0800 Subject: [PATCH 35/59] Patch del (#52754) * [DO NOT MERGE] adadelta lr support * [DO NOT MERGE] gpu support * [test] follow torch * fix acc update order * for ci * [bug fix] update master para * [bug fix] update test * [bug fix] for ci test * for ci * fix xpu * [adadelta fix] del fluid head file * for ci * del notes --- .../phi/kernels/impl/adadelta_kernel_impl.h | 34 +++++++------------ 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/paddle/phi/kernels/impl/adadelta_kernel_impl.h b/paddle/phi/kernels/impl/adadelta_kernel_impl.h index c432c72d832c6..18fcd953d6532 100644 --- a/paddle/phi/kernels/impl/adadelta_kernel_impl.h +++ b/paddle/phi/kernels/impl/adadelta_kernel_impl.h @@ -13,10 +13,6 @@ // limitations under the License. #pragma once -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" - #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/kernels/adadelta_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -67,26 +63,20 @@ void AdadeltaKernel(const Context& dev_ctx, -(((eigen_avg_squared_update + epsilon_).sqrt()) / ((eigen_avg_squared_grad_out + epsilon_).sqrt()) * eigen_grad_cast); Eigen::DSizes m_dsize(avg_squared_update_out->numel()); - if (paddle::platform::is_cpu_place(dev_ctx.GetPlace())) { - auto* lr = learning_rate.data(); + auto lr = EigenVector::Flatten(learning_rate); + if (multi_precision) { + auto eigen_master_param_out = + EigenVector::Flatten(*master_param_outs); + auto eigen_master_param = EigenVector::Flatten(*master_param); + + eigen_master_param_out.device(place) = + eigen_master_param + lr.broadcast(m_dsize) * update; eigen_param_out.device(place) = - eigen_param + lr[0] * update.template cast(); + (eigen_param.template cast() + lr.broadcast(m_dsize) * update) + .template cast(); } else { - auto lr = EigenVector::Flatten(learning_rate); - if (multi_precision) { - auto eigen_master_param_out = - EigenVector::Flatten(*master_param_outs); - auto eigen_master_param = EigenVector::Flatten(*master_param); - - eigen_master_param_out.device(place) = - eigen_master_param + lr.broadcast(m_dsize) * update; - eigen_param_out.device(place) = (eigen_param.template cast() + - lr.broadcast(m_dsize) * update) - .template cast(); - } else { - eigen_param_out.device(place) = - eigen_param + (lr.broadcast(m_dsize) * update).template cast(); - } + eigen_param_out.device(place) = + eigen_param + (lr.broadcast(m_dsize) * update).template cast(); } eigen_avg_squared_update_out.device(place) = rho_ * eigen_avg_squared_update + (1 - rho_) * update.square(); From a482f6bfff60de3c689dd6208bf2b3b2bc030da1 Mon Sep 17 00:00:00 2001 From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com> Date: Wed, 12 Apr 2023 11:05:16 +0800 Subject: [PATCH 36/59] remove [-Wimplicit-fallthrough=] warning (#52783) * test, test=develop * test, test=develop * test, test=develop --- paddle/fluid/imperative/layout_transformer.h | 8 ++------ .../composite_backward_api.h | 5 +---- paddle/utils/string/tinyformat/tinyformat.h | 19 +++++++------------ 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/imperative/layout_transformer.h b/paddle/fluid/imperative/layout_transformer.h index 93c924a095c9e..2bdbead6aae0d 100644 --- a/paddle/fluid/imperative/layout_transformer.h +++ b/paddle/fluid/imperative/layout_transformer.h @@ -402,16 +402,12 @@ class ArgmaxOpTransformer case paddle::framework::proto::AttrType::INT: { auto axis = PADDLE_GET_CONST(int, (*attrs)["axis"]); (*attrs)["axis"] = static_cast(perm[axis]); -#ifdef LINUX - __attribute__((fallthrough)); -#endif + break; } case paddle::framework::proto::AttrType::LONG: { auto axis = PADDLE_GET_CONST(int64_t, (*attrs)["axis"]); (*attrs)["axis"] = static_cast(perm[axis]); -#ifdef LINUX - __attribute__((fallthrough)); -#endif + break; } default: VLOG(4) << "The data_type of axis is Error, axis must be int or " diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index 6697f1a614c38..c0830b2a75428 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -1483,11 +1483,8 @@ void batch_norm_grad(const Tensor& x, if (bias_grad) { set_output(out_grad_data_sum, bias_grad); } - break; } -#ifdef LINUX - __attribute__((fallthrough)); -#endif + break; } default: diff --git a/paddle/utils/string/tinyformat/tinyformat.h b/paddle/utils/string/tinyformat/tinyformat.h index bd8d47849db96..41319c391455e 100644 --- a/paddle/utils/string/tinyformat/tinyformat.h +++ b/paddle/utils/string/tinyformat/tinyformat.h @@ -691,9 +691,8 @@ inline const char *streamStateFromFormat(std::ostream &out, // NOLINT break; case 'X': out.setf(std::ios::uppercase); -#ifdef LINUX - __attribute__((fallthrough)); -#endif + break; + case 'x': case 'p': out.setf(std::ios::hex, std::ios::basefield); @@ -701,26 +700,22 @@ inline const char *streamStateFromFormat(std::ostream &out, // NOLINT break; case 'E': out.setf(std::ios::uppercase); -#ifdef LINUX - __attribute__((fallthrough)); -#endif + break; case 'e': out.setf(std::ios::scientific, std::ios::floatfield); out.setf(std::ios::dec, std::ios::basefield); break; case 'F': out.setf(std::ios::uppercase); -#ifdef LINUX - __attribute__((fallthrough)); -#endif + + break; + case 'f': out.setf(std::ios::fixed, std::ios::floatfield); break; case 'G': out.setf(std::ios::uppercase); -#ifdef LINUX - __attribute__((fallthrough)); -#endif + break; case 'g': out.setf(std::ios::dec, std::ios::basefield); From 3a7980f27c1f46c55628e236ea57aca3cc6b1569 Mon Sep 17 00:00:00 2001 From: xiongkun Date: Wed, 12 Apr 2023 11:07:00 +0800 Subject: [PATCH 37/59] bugfix for sparse tensor reduce. (#52785) --- paddle/fluid/distributed/collective/reducer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc index 63071139a5f40..defc84fbe3d9c 100644 --- a/paddle/fluid/distributed/collective/reducer.cc +++ b/paddle/fluid/distributed/collective/reducer.cc @@ -821,9 +821,9 @@ void EagerReducer::MarkVarReady(const size_t var_index, auto &group = groups_[group_index]; auto &group_tensor = group.dense_tensors_[inside_group_index]; - const auto length = group.length_[inside_group_index]; if (!group.is_sparse_) { + const auto length = group.length_[inside_group_index]; if (is_used_var) { auto *autograd_meta = tensors_[var_index].get_autograd_meta(); auto &grad_tensor = From f9b155f98956368c9d248f8a9cce598c90b204c5 Mon Sep 17 00:00:00 2001 From: Wei Shengyu Date: Wed, 12 Apr 2023 11:18:13 +0800 Subject: [PATCH 38/59] [AMP OP&Test] add fp16/bf16 unittest for pool2d op (#52288) * add bf16 support and bf16/fp16 unittest for pool2d * add include files * dbg * reformat * reformat * modify code according to review comment * remove duplicate code * remove dup code * remove useless include * dbg --- paddle/phi/kernels/gpu/pool_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/pool_kernel.cu | 3 +- .../fluid/tests/unittests/test_pool2d_op.py | 114 +++++++++++++++--- 3 files changed, 103 insertions(+), 17 deletions(-) diff --git a/paddle/phi/kernels/gpu/pool_grad_kernel.cu b/paddle/phi/kernels/gpu/pool_grad_kernel.cu index 598a48f802891..e4cfcb23b730e 100644 --- a/paddle/phi/kernels/gpu/pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/pool_grad_kernel.cu @@ -25,7 +25,8 @@ PD_REGISTER_KERNEL(pool2d_grad, phi::Pool2dGradKernel, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(pool2d_double_grad, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/gpu/pool_kernel.cu b/paddle/phi/kernels/gpu/pool_kernel.cu index 6323909c9d0dc..65d0ef4bdc916 100644 --- a/paddle/phi/kernels/gpu/pool_kernel.cu +++ b/paddle/phi/kernels/gpu/pool_kernel.cu @@ -25,7 +25,8 @@ PD_REGISTER_KERNEL(pool2d, phi::Pool2dKernel, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(max_pool2d_with_index, GPU, ALL_LAYOUT, diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_op.py b/python/paddle/fluid/tests/unittests/test_pool2d_op.py index 5ab2bad28e3c3..aae7ba87697ce 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np +from eager_op_test import convert_float_to_uint16 import paddle from paddle.fluid import core @@ -366,7 +367,11 @@ def setUp(self): self.init_data_format() self.init_shape() - input = np.random.random(self.shape).astype(self.dtype) + if self.is_bfloat16_op(): + input = np.random.random(self.shape).astype(np.float32) + else: + input = np.random.random(self.shape).astype(self.dtype) + output = pool2D_forward_naive( input, self.ksize, @@ -379,8 +384,14 @@ def setUp(self): self.data_format, self.pool_type, self.padding_algorithm, - ).astype(self.dtype) - self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} + ) + + if self.is_bfloat16_op(): + output = convert_float_to_uint16(output) + self.inputs = {'X': convert_float_to_uint16(input)} + else: + output = output.astype(self.dtype) + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.attrs = { 'strides': self.strides, @@ -427,7 +438,6 @@ def test_check_grad(self): place, {'X'}, 'Out', - max_relative_error=0.07, check_dygraph=(not self.use_mkldnn), ) elif self.pool_type != "max": @@ -577,7 +587,6 @@ def test_check_output(self): if core.is_float16_supported(place): self.check_output_with_place( place, - atol=1e-3, check_dygraph=(not self.use_mkldnn), ) @@ -593,7 +602,6 @@ def test_check_grad(self): place, {'X'}, 'Out', - max_relative_error=0.07, check_dygraph=(not self.use_mkldnn), ) @@ -618,7 +626,6 @@ def test_check_output(self): if core.is_float16_supported(place): self.check_output_with_place( place, - atol=1e-3, check_dygraph=(not self.use_mkldnn), ) @@ -634,7 +641,6 @@ def test_check_grad(self): place, {'X'}, 'Out', - max_relative_error=0.07, check_dygraph=(not self.use_mkldnn), ) @@ -643,20 +649,58 @@ def test_check_grad(self): globals()[cls_name] = TestFp16Case +def create_test_bf16_class(parent, check_grad=True): + @unittest.skipIf( + not core.is_compiled_with_cuda(), "core is not compiled with CUDA" + ) + class TestBf16Case(parent): + def init_kernel_type(self): + self.use_cuda = True + self.dtype = np.uint16 + + def test_check_output(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + self.check_output_with_place( + place, + check_dygraph=(not self.use_mkldnn), + ) + + def test_check_grad(self): + place = core.CUDAPlace(0) + if self.pool_type != "max" and check_grad: + self.check_grad_with_place( + place, + {'X'}, + 'Out', + check_dygraph=(not self.use_mkldnn), + ) + + cls_name = "{}_{}".format(parent.__name__, "Bf16Op") + TestBf16Case.__name__ = cls_name + globals()[cls_name] = TestBf16Case + + create_test_cudnn_fp16_class(TestPool2D_Op) -create_test_cudnn_fp16_class(TestCase1, check_grad=False) +create_test_cudnn_fp16_class(TestCase1) create_test_cudnn_fp16_class(TestCase2) create_test_cudnn_fp16_class(TestCase3) create_test_cudnn_fp16_class(TestCase4) create_test_cudnn_fp16_class(TestCase5) create_test_fp16_class(TestPool2D_Op) -create_test_fp16_class(TestCase1, check_grad=False) +create_test_fp16_class(TestCase1) create_test_fp16_class(TestCase2) create_test_fp16_class(TestCase3) create_test_fp16_class(TestCase4) create_test_fp16_class(TestCase5) +create_test_bf16_class(TestPool2D_Op) +create_test_bf16_class(TestCase1) +create_test_bf16_class(TestCase2) +create_test_bf16_class(TestCase3) +create_test_bf16_class(TestCase4) +create_test_bf16_class(TestCase5) # --------------------test pool2d use ceil mode-------------------- @@ -796,12 +840,26 @@ def init_shape(self): create_test_cudnn_class(TestCase5_AsyPadding) create_test_cudnn_fp16_class(TestPool2D_AsyPadding) -create_test_cudnn_fp16_class(TestCase1_AsyPadding, check_grad=False) +create_test_cudnn_fp16_class(TestCase1_AsyPadding) create_test_cudnn_fp16_class(TestCase2_AsyPadding) create_test_cudnn_fp16_class(TestCase3_AsyPadding) create_test_cudnn_fp16_class(TestCase4_AsyPadding) create_test_cudnn_fp16_class(TestCase5_AsyPadding) +create_test_fp16_class(TestPool2D_AsyPadding) +create_test_fp16_class(TestCase1_AsyPadding) +create_test_fp16_class(TestCase2_AsyPadding) +create_test_fp16_class(TestCase3_AsyPadding) +create_test_fp16_class(TestCase4_AsyPadding) +create_test_fp16_class(TestCase5_AsyPadding) + +create_test_bf16_class(TestPool2D_AsyPadding) +create_test_bf16_class(TestCase1_AsyPadding) +create_test_bf16_class(TestCase2_AsyPadding) +create_test_bf16_class(TestCase3_AsyPadding) +create_test_bf16_class(TestCase4_AsyPadding) +create_test_bf16_class(TestCase5_AsyPadding) + create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding) create_test_cudnn_use_ceil_class(TestCase1_AsyPadding) @@ -908,12 +966,26 @@ def init_shape(self): create_test_cudnn_class(TestCase5_channel_last) create_test_cudnn_fp16_class(TestPool2D_channel_last) -create_test_cudnn_fp16_class(TestCase1_channel_last, check_grad=False) +create_test_cudnn_fp16_class(TestCase1_channel_last) create_test_cudnn_fp16_class(TestCase2_channel_last) create_test_cudnn_fp16_class(TestCase3_channel_last) create_test_cudnn_fp16_class(TestCase4_channel_last) create_test_cudnn_fp16_class(TestCase5_channel_last) +create_test_fp16_class(TestPool2D_channel_last) +create_test_fp16_class(TestCase1_channel_last) +create_test_fp16_class(TestCase2_channel_last) +create_test_fp16_class(TestCase3_channel_last) +create_test_fp16_class(TestCase4_channel_last) +create_test_fp16_class(TestCase5_channel_last) + +create_test_bf16_class(TestPool2D_channel_last) +create_test_bf16_class(TestCase1_channel_last) +create_test_bf16_class(TestCase2_channel_last) +create_test_bf16_class(TestCase3_channel_last) +create_test_bf16_class(TestCase4_channel_last) +create_test_bf16_class(TestCase5_channel_last) + create_test_cudnn_use_ceil_class(TestPool2D_channel_last) create_test_cudnn_use_ceil_class(TestCase1_channel_last) @@ -1023,14 +1095,26 @@ def init_shape(self): create_test_cudnn_class(TestCase5_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestPool2D_AsyPadding_channel_last) -create_test_cudnn_fp16_class( - TestCase1_AsyPadding_channel_last, check_grad=False -) +create_test_cudnn_fp16_class(TestCase1_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestCase2_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestCase3_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestCase4_AsyPadding_channel_last) create_test_cudnn_fp16_class(TestCase5_AsyPadding_channel_last) +create_test_fp16_class(TestPool2D_AsyPadding_channel_last) +create_test_fp16_class(TestCase1_AsyPadding_channel_last) +create_test_fp16_class(TestCase2_AsyPadding_channel_last) +create_test_fp16_class(TestCase3_AsyPadding_channel_last) +create_test_fp16_class(TestCase4_AsyPadding_channel_last) +create_test_fp16_class(TestCase5_AsyPadding_channel_last) + +create_test_bf16_class(TestPool2D_AsyPadding_channel_last) +create_test_bf16_class(TestCase1_AsyPadding_channel_last) +create_test_bf16_class(TestCase2_AsyPadding_channel_last) +create_test_bf16_class(TestCase3_AsyPadding_channel_last) +create_test_bf16_class(TestCase4_AsyPadding_channel_last) +create_test_bf16_class(TestCase5_AsyPadding_channel_last) + create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding_channel_last) create_test_cudnn_use_ceil_class(TestCase1_AsyPadding_channel_last) From 0baacc694d35769dbfc6ccf790e42b80ba3e70a6 Mon Sep 17 00:00:00 2001 From: WangZhen <23097963+0x45f@users.noreply.github.com> Date: Wed, 12 Apr 2023 11:33:24 +0800 Subject: [PATCH 39/59] Fix backend typo in ut (#52757) --- test/dygraph_to_static/test_cinn_prim.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/dygraph_to_static/test_cinn_prim.py b/test/dygraph_to_static/test_cinn_prim.py index c5527e85238b6..388cb67c66f43 100644 --- a/test/dygraph_to_static/test_cinn_prim.py +++ b/test/dygraph_to_static/test_cinn_prim.py @@ -170,10 +170,10 @@ def test_backend(self): out2 = self.forward(x, None) np.testing.assert_allclose(out1, out2, rtol=1e-6) - def forward(self, x, beckend=None): + def forward(self, x, backend=None): paddle.seed(2022) net = PrimeNet() - net = paddle.jit.to_static(net, backend=beckend) + net = paddle.jit.to_static(net, backend=backend) out = net(x) return out From 2309aa585cd9a4d5f35a8ea936b388d9a58e8645 Mon Sep 17 00:00:00 2001 From: gaoziyuan <88373061+gzy19990617@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:04:17 +0800 Subject: [PATCH 40/59] =?UTF-8?q?=E3=80=90Hackathon=2078=E3=80=91=E4=B8=BA?= =?UTF-8?q?Paddle-TRT=E5=A2=9E=E5=8A=A0cumsum=E7=AE=97=E5=AD=90=20(#52518)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fluid/inference/api/analysis_predictor.cc | 1 + .../inference/tensorrt/convert/CMakeLists.txt | 1 + .../inference/tensorrt/convert/cumsum_op.cc | 157 ++++++++++++++++ .../inference/tensorrt/convert/op_converter.h | 46 +++++ paddle/fluid/inference/tensorrt/op_teller.cc | 25 ++- .../ir/inference/test_trt_convert_cumsum.py | 176 ++++++++++++++++++ 6 files changed, 404 insertions(+), 2 deletions(-) create mode 100644 paddle/fluid/inference/tensorrt/convert/cumsum_op.cc create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cumsum.py diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 38222b797f14f..6523e5cfced3e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2688,6 +2688,7 @@ USE_TRT_CONVERTER(expand_v2) USE_TRT_CONVERTER(take_along_axis) USE_TRT_CONVERTER(skip_groupnorm_act) USE_TRT_CONVERTER(preln_groupnorm_act) +USE_TRT_CONVERTER(cumsum) #if IS_TRT_VERSION_GE(8522) USE_TRT_CONVERTER(flash_multihead_matmul) USE_TRT_CONVERTER(cross_multihead_matmul) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index cbe26a3d31e4d..1793e1207771e 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -106,6 +106,7 @@ list( skip_groupnorm_act_op.cc preln_groupnorm_act_op.cc expand_v2_op.cc + cumsum_op.cc temporal_shift_op.cc) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) diff --git a/paddle/fluid/inference/tensorrt/convert/cumsum_op.cc b/paddle/fluid/inference/tensorrt/convert/cumsum_op.cc new file mode 100644 index 0000000000000..a46bf1efa171b --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/cumsum_op.cc @@ -0,0 +1,157 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * Cumsum Op + */ +class CumsumOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { +#if IS_TRT_VERSION_GE(7220) + VLOG(3) << "convert a cumsum op to tensorrt layer"; + framework::OpDesc op_desc(op, nullptr); + std::string input_x_name = op_desc.Input("X").front(); + std::string output_name = op_desc.Output("Out").front(); + auto* input_x_tensor = engine_->GetITensor(input_x_name); + auto dims = input_x_tensor->getDimensions(); + auto rank = dims.nbDims; + int axis = 0; + if (op_desc.HasAttr("axis")) { + axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis")); + if (axis < 0) { + axis += rank; + } + } + + // getAxisLength default is a scalar + auto getAxisLength = + [&](nvinfer1::ITensor* inpTensor, int axis, bool scalar = true) { + auto dims = inpTensor->getDimensions(); + int d = dims.d[axis]; + if (d >= 0) { + return Add1DConstantLayer(d, "", scalar); + } else { + nvinfer1::ITensor* inpShape = Shape(inpTensor); + return GetEleTensorOfShape(inpShape, d, scalar); + } + }; + + // Create "inputSliced" tensor that is sliced on dimension[axis] to length 1 + nvinfer1::Dims start; + start.nbDims = rank; + std::vector start_vec(rank, 0); + std::fill(start.d, start.d + rank, 0); + + nvinfer1::Dims size; + size.nbDims = rank; + nvinfer1::Dims stride; + stride.nbDims = rank; + auto axisLength = getAxisLength(input_x_tensor, axis, false); + + auto starts_tensor = + Add1DConstantLayer(start_vec, output_name + "_start_tensor_"); + auto sizes_tensor = axis == 0 ? Add1DConstantLayer(1) + : getAxisLength(input_x_tensor, 0, false); + auto strides_tensor = axis == 0 ? axisLength : Add1DConstantLayer(1); + + for (int i = 1; i < rank; i++) { + if (i == axis) { + std::vector strides_itensors = {strides_tensor, + axisLength}; + strides_tensor = Concat(strides_itensors); + std::vector sizes_itensors = { + sizes_tensor, Add1DConstantLayer(1)}; + sizes_tensor = Concat(sizes_itensors); + } else { + auto currLength = getAxisLength(input_x_tensor, i, false); + std::vector strides_itensors = { + strides_tensor, Add1DConstantLayer(1)}; + strides_tensor = Concat(strides_itensors); + std::vector sizes_itensors = {sizes_tensor, + currLength}; + sizes_tensor = Concat(sizes_itensors); + } + } + + auto inputSliced = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *input_x_tensor, start, size, stride); + inputSliced->setInput(1, *starts_tensor); + inputSliced->setInput(2, *sizes_tensor); + inputSliced->setInput(3, *strides_tensor); + auto inputSliced_output = inputSliced->getOutput(0); + + // Scan through each slice across axis and add it to the running sum + auto loop = TRT_ENGINE_ADD_LAYER(engine_, Loop); + nvinfer1::ITensor* tripLimit = getAxisLength(input_x_tensor, axis); + loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT); + auto iterator = loop->addIterator(*input_x_tensor, axis); + auto data = iterator->getOutput(0); + + // Squeeze inputSliced down to same shape as `data` + auto sliced_dims = inputSliced_output->getDimensions(); + std::vector subscripts(sliced_dims.nbDims); + std::iota(subscripts.begin(), subscripts.end(), 0); + auto p = std::remove_if(subscripts.begin(), + subscripts.end(), + [axis](int x) { return x == axis; }); + subscripts.resize(p - subscripts.begin()); + auto newDims = Gather(Shape(inputSliced_output), subscripts); + inputSliced_output = Reshape(inputSliced_output, newDims); + + // creat ZeroTensor + std::vector zero_vec{0.f}; + auto zero = Add1DConstantLayer(zero_vec); + auto cast = TRT_ENGINE_ADD_LAYER(engine_, Identity, *zero); + cast->setOutputType(0, inputSliced_output->getType()); + + zero = TRT_ENGINE_ADD_LAYER( + engine_, + ElementWise, + *inputSliced_output, + *BroadcastTensors(cast->getOutput(0), inputSliced_output), + nvinfer1::ElementWiseOperation::kPROD) + ->getOutput(0); + + auto runningSum = loop->addRecurrence(*zero); + auto runningSumTensor = runningSum->getOutput(0); + auto curSum = TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *data, + *runningSumTensor, + nvinfer1::ElementWiseOperation::kSUM); + runningSum->setInput(1, *curSum->getOutput(0)); + auto reverseFlag = nvinfer1::LoopOutput::kCONCATENATE; + nvinfer1::ILoopOutputLayer* loopOut = + loop->addLoopOutput(*curSum->getOutput(0), reverseFlag, axis); + loopOut->setInput(1, *tripLimit); + RreplenishLayerAndOutput(loopOut, "cumsum", {output_name}, test_mode); +#else + VLOG(3) << "Cumsum is not supported when TensorRT < 7.2.2"; +#endif + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(cumsum, CumsumOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index db19e5c45d3de..e2dfe4d5ba304 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -416,6 +416,52 @@ class OpConverter { return TRT_ENGINE_ADD_LAYER(engine_, Shape, *input)->getOutput(0); } + nvinfer1::ITensor* Reshape(nvinfer1::ITensor* input, + nvinfer1::ITensor* newShape) { + nvinfer1::ITensor* oldShape = Shape(input); + if (oldShape == newShape) { + return input; + } + auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + shuffle->setInput(1, *newShape); + return shuffle->getOutput(0); + } + + nvinfer1::ITensor* BroadcastTensor(nvinfer1::ITensor* input, + const int nbDims) { + auto oldShape = Shape(input); + auto oldShapeDims = oldShape->getDimensions(); + const int rank = oldShapeDims.nbDims; + if (rank > nbDims) { + PADDLE_THROW(platform::errors::InvalidArgument( + "Cannot broadcast a higher rank tensor to a lower rank tensor.")); + } + if (rank < nbDims) { + nvinfer1::ITensor* concat_shape_tensor; + auto* one_rank_tensor = + Add1DConstantLayer(std::vector(nbDims - rank, 1)); + std::vector itensors; + itensors.push_back(one_rank_tensor); + itensors.push_back(oldShape); + concat_shape_tensor = Concat(itensors); + input = Reshape(input, concat_shape_tensor); + } + return input; + } + + nvinfer1::ITensor* BroadcastTensors(nvinfer1::ITensor* a, + nvinfer1::ITensor* b) { + const int aDims = a->getDimensions().nbDims; + const int bDims = b->getDimensions().nbDims; + if (aDims == bDims) { + VLOG(3) << "Broadcast two equal rank tensors"; + } + if (aDims > bDims) { + return BroadcastTensor(b, aDims); + } + return BroadcastTensor(a, bDims); + } + // Concat not make rank changed nvinfer1::ITensor* Concat(const std::vector& inputs, int axis = 0) { diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 24dca82d3fba1..85f5c003746c2 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2705,6 +2705,25 @@ struct SimpleOpTypeSetTeller : public Teller { #endif } + if (op_type == "cumsum") { +#if !IS_TRT_VERSION_GE(7220) + VLOG(3) << "cumsum is not supported when TensorRT < 7.2.2"; + return false; +#endif + if (!with_dynamic_shape) { + VLOG(3) << "the cumsum does not support " + "static shape yet"; + return false; + } + auto* block = desc.Block(); + if (block == nullptr) { + VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " + "Developers need to check whether block_desc is passed in " + "the pass."; + return false; + } + } + if (op_type == "temporal_shift") { #if !IS_TRT_VERSION_GE(8200) VLOG(3) << "temporal_shift is not supported when TensorRT < 8.2"; @@ -2906,7 +2925,8 @@ struct SimpleOpTypeSetTeller : public Teller { "skip_groupnorm_act", "preln_groupnorm_act", "temporal_shift", - "grid_sampler"}; + "grid_sampler", + "cumsum"}; std::unordered_set teller_set{ "mul", @@ -3064,7 +3084,8 @@ struct SimpleOpTypeSetTeller : public Teller { "skip_groupnorm_act", "preln_groupnorm_act", "temporal_shift", - "grid_sampler"}; + "grid_sampler", + "cumsum"}; }; struct GenericPluginTeller : public Teller { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cumsum.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cumsum.py new file mode 100644 index 0000000000000..60dbfa37aab22 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cumsum.py @@ -0,0 +1,176 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial +from typing import List + +import numpy as np +from program_config import ProgramConfig, TensorConfig +from trt_layer_auto_scan_test import TrtLayerAutoScanTest + +import paddle.inference as paddle_infer + + +class TrtConvertCumsum(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220: + return False + return True + + def sample_program_configs(self): + self.trt_param.workspace_size = 1073741824 + + def generate_input1(): + if self.dims == 2: + self.input_shape = [2, 3] + return np.random.random([2, 3]).astype(np.int32) + elif self.dims == 3: + self.input_shape = [2, 3, 4] + return np.random.random([2, 3, 4]).astype(np.int64) + elif self.dims == 4: + self.input_shape = [4, 3, 32, 32] + return np.random.random([4, 3, 32, 32]).astype(np.float32) - 0.5 + + for dims in [2, 3, 4]: + for axis in range(-1, dims): + for type in ["int32", "int64", "float32", "float64"]: + self.dims = dims + ops_config = [ + { + "op_type": "cumsum", + "op_inputs": { + "X": ["input_data"], + }, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": {"axis": axis, "dtype": type}, + } + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input1) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + # no op_attrs + for dims in [2, 3, 4]: + self.dims = dims + ops_config = [ + { + "op_type": "cumsum", + "op_inputs": { + "X": ["input_data"], + }, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": {}, + } + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input1) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(): + + if self.dims == 2: + self.dynamic_shape.min_input_shape = { + "input_data": [2, 3], + } + self.dynamic_shape.max_input_shape = { + "input_data": [2, 3], + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 3], + } + + elif self.dims == 3: + self.dynamic_shape.min_input_shape = { + "input_data": [2, 3, 4], + } + self.dynamic_shape.max_input_shape = { + "input_data": [2, 3, 4], + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 3, 4], + } + + elif self.dims == 4: + self.dynamic_shape.min_input_shape = { + "input_data": [4, 3, 32, 32], + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 3, 32, 32], + } + self.dynamic_shape.opt_input_shape = { + "input_data": [4, 3, 32, 32], + } + + def generate_trt_nodes_num(attrs, dynamic_shape): + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220: + return 0, 3 + return 1, 2 + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + + # for dynamic_shape + generate_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-2 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main() From f650e9011cc7018776840378092075344e4ddba5 Mon Sep 17 00:00:00 2001 From: qizhaoaoe <10208099+qizhaoaoe@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:39:39 +0800 Subject: [PATCH 41/59] fix dtype cast in amp for instance_norm. (#52765) * fix dtype cast in amp. * add test case and update docs. * remove set_prim. --- python/paddle/amp/auto_cast.py | 11 ++-- .../unittests/test_instance_norm_op_v2.py | 64 ++++++++++++++++++- python/paddle/static/amp/fp16_utils.py | 2 + 3 files changed, 71 insertions(+), 6 deletions(-) diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index 33c7855d89724..bc76f866d94eb 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -213,6 +213,9 @@ def pure_fp16_initialize(models): paddle.nn.BatchNorm3D, paddle.nn.LayerNorm, paddle.nn.SyncBatchNorm, + paddle.nn.InstanceNorm1D, + paddle.nn.InstanceNorm2D, + paddle.nn.InstanceNorm3D, ), ): continue @@ -522,7 +525,7 @@ def amp_decorate( ): """ Decorate models and optimizers for auto-mixed-precision. When level is O1(amp), the decorate will do nothing. - When level is O2(pure fp16), the decorate will cast all parameters of models to FP16, except BatchNorm and LayerNorm. + When level is O2(pure fp16), the decorate will cast all parameters of models to FP16, except BatchNorm, InstanceNorm and LayerNorm. Commonly, it is used together with `amp_guard` to achieve Pure fp16 in imperative mode. @@ -530,7 +533,7 @@ def amp_decorate( models(Layer|list of Layer, optional): The defined models by user, models must be either a single model or a list of models. Default is None. optimizers(Optimizer|list of Optimizer, optional): The defined optimizers by user, optimizers must be either a single optimizer or a list of optimizers. Default is None. level(str, optional): Auto mixed precision level. Accepted values are "O1" and "O2": O1 represent mixed precision, the decorator will do nothing; - O2 represent Pure fp16/bf16, the decorator will cast all parameters of models to FP16/BF16, except BatchNorm and LayerNorm. Default is O1(amp) + O2 represent Pure fp16/bf16, the decorator will cast all parameters of models to FP16/BF16, except BatchNorm, InstanceNorm and LayerNorm. Default is O1(amp) dtype(str, optional): Whether to use 'float16' or 'bfloat16'. Default is 'float16'. master_weight(bool, optinal): For level='O2', whether to use multi-precision during weight updating. If master_weight is None, in O2 level optimizer will use multi-precision. Default is None. save_dtype(float, optional): The save model parameter dtype when use `paddle.save` or `paddle.jit.save`,it should be float16, bfloat16, float32, float64 or None. @@ -741,7 +744,7 @@ def decorate( ): """ Decorate models and optimizers for auto-mixed-precision. When level is O1(amp), the decorate will do nothing. - When level is O2(pure float16/bfloat16), the decorate will cast all parameters of models to float16/bfloat16, except BatchNorm and LayerNorm. + When level is O2(pure float16/bfloat16), the decorate will cast all parameters of models to float16/bfloat16, except BatchNorm, InstanceNorm and LayerNorm. Commonly, it is used together with `auto_cast` to achieve Pure float16/bfloat16 in imperative mode. @@ -749,7 +752,7 @@ def decorate( models(Layer|list of Layer): The defined models by user, models must be either a single model or a list of models. Default is None. optimizers(Optimizer|list of Optimizer, optional): The defined optimizers by user, optimizers must be either a single optimizer or a list of optimizers. Default is None. level(str, optional): Auto mixed precision level. Accepted values are 'O1' and 'O2': O1 represent mixed precision, the decorator will do nothing; - O2 represent Pure float16/bfloat16, the decorator will cast all parameters of models to float16/bfloat16, except BatchNorm and LayerNorm. Default is O1(amp) + O2 represent Pure float16/bfloat16, the decorator will cast all parameters of models to float16/bfloat16, except BatchNorm, InstanceNorm and LayerNorm. Default is O1(amp) dtype(str, optional): Whether to use 'float16' or 'bfloat16'. Default is 'float16'. master_weight(bool, optinal): For level='O2', whether to use multi-precision during weight updating. If master_weight is None, in O2 level optimizer will use multi-precision. Default is None. save_dtype(float, optional): The save model parameter dtype when use `paddle.save` or `paddle.jit.save`,it should be float16, bfloat16, float32, float64 or None. diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py index d214965b2dd6e..ab687aeb034f5 100644 --- a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py @@ -18,8 +18,9 @@ from eager_op_test import OpTest, convert_float_to_uint16 import paddle -from paddle import fluid -from paddle.fluid import Program, core, program_guard +import paddle.nn.functional as F +from paddle import fluid, nn +from paddle.fluid import Program, core, framework, program_guard class TestInstanceNorm(unittest.TestCase): @@ -319,5 +320,64 @@ def test_check_grad(self): ) +class PrimNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + self.conv = nn.Conv2D(2, 4, (3, 3), bias_attr=False) + self.instance_norm = nn.InstanceNorm2D(4) + + def forward(self, x): + y = self.conv(x) + out = self.instance_norm(y) + res = F.max_pool2d(out, kernel_size=2, stride=2, padding=0) + return res + + +def apply_to_static(net, use_cinn): + build_strategy = paddle.static.BuildStrategy() + build_strategy.build_cinn_pass = use_cinn + return paddle.jit.to_static(net, build_strategy=False) + + +class TestPrimForwardAndBackward(unittest.TestCase): + """ + Test PrimNet with @to_static + amp O2(with fp32) + """ + + def setUp(self): + paddle.seed(2022) + paddle.disable_static() + self.x = paddle.randn([4, 2, 6, 6], dtype="float32") + self.x.stop_gradient = False + + def train(self, use_amp, data_layout="NCHW"): + paddle.seed(2022) + net = PrimNet() + sgd = paddle.optimizer.SGD( + learning_rate=0.1, parameters=net.parameters() + ) + net = apply_to_static(net, False) + if use_amp: + net = paddle.amp.decorate(models=net, level='O2') + with paddle.amp.auto_cast(enable=use_amp, level='O2'): + out = net(self.x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_grad() + return loss + + def test_amp_nchw(self): + if not isinstance(framework._current_expected_place(), core.CPUPlace): + expected = self.train(False) + actual = self.train(True) + np.testing.assert_allclose( + expected, + actual, + rtol=1e-3, + atol=1e-3, + ) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/static/amp/fp16_utils.py b/python/paddle/static/amp/fp16_utils.py index ced21f9bb758e..19d287f6fa07d 100644 --- a/python/paddle/static/amp/fp16_utils.py +++ b/python/paddle/static/amp/fp16_utils.py @@ -99,6 +99,8 @@ def _keep_fp32_input(op, in_name): return in_name != 'X' if op_type == 'layer_norm' and _keep_layer_norm_scale_bias_to_fp32(): return in_name != 'X' + if op_type == 'instance_norm': + return in_name != 'X' if op_type == 'fused_bn_add_activation': return in_name not in {'X', 'Z'} if op_type == 'resnet_unit': From f063074f4827d646141f55680a756bc43bd7d036 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Wed, 12 Apr 2023 13:25:20 +0800 Subject: [PATCH 42/59] [API]Fix paddle.arange infershape always -1 (#52764) --- .../paddle/fluid/tests/unittests/test_arange.py | 1 + python/paddle/tensor/creation.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_arange.py b/python/paddle/fluid/tests/unittests/test_arange.py index a0d1ddc8b9eec..b8d9866ebc531 100644 --- a/python/paddle/fluid/tests/unittests/test_arange.py +++ b/python/paddle/fluid/tests/unittests/test_arange.py @@ -151,6 +151,7 @@ def test_out(self): expected_data = np.arange(0, 5, 1).astype(np.float32) self.assertEqual((out == expected_data).all(), True) + self.assertListEqual(list(x1.shape), [5]) class TestArangeImperative(unittest.TestCase): diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 99d9ad594c119..456e83f816865 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -1293,6 +1293,14 @@ def arange(start=0, end=None, step=1, dtype=None, name=None): end = start start = 0 + out_shape = None + if not in_dygraph_mode() and ( + not isinstance(start, Variable) + and not isinstance(end, Variable) + and not isinstance(step, Variable) + ): + out_shape = [int(math.ceil((end - start) / step))] + if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) @@ -1324,13 +1332,6 @@ def arange(start=0, end=None, step=1, dtype=None, name=None): 'range/arange', ) helper = LayerHelper('range', **locals()) - out_shape = None - if ( - not isinstance(start, Variable) - and not isinstance(end, Variable) - and not isinstance(step, Variable) - ): - out_shape = [int(math.ceil((end - start) / step))] out = helper.create_variable_for_type_inference(dtype, shape=out_shape) helper.append_op( type='range', From 05fd6d10e9dbd0601e660794385b895d694b604d Mon Sep 17 00:00:00 2001 From: CHANGer Date: Wed, 12 Apr 2023 14:05:14 +0800 Subject: [PATCH 43/59] [Auto Parallel]Add the single-node topology detection (#52723) --- .../distributed/auto_parallel/topology.py | 351 ++++++++++++++++++ .../unittests/auto_parallel/test_topology.py | 33 ++ 2 files changed, 384 insertions(+) create mode 100644 python/paddle/distributed/auto_parallel/topology.py create mode 100644 python/paddle/fluid/tests/unittests/auto_parallel/test_topology.py diff --git a/python/paddle/distributed/auto_parallel/topology.py b/python/paddle/distributed/auto_parallel/topology.py new file mode 100644 index 0000000000000..9de045bd612a3 --- /dev/null +++ b/python/paddle/distributed/auto_parallel/topology.py @@ -0,0 +1,351 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import subprocess +import warnings + + +def call_cmd(cmd, err_msg, default_value): + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + shell=True, + ) + stdout, stderr = process.communicate() + if stderr: + warnings.warn(err_msg) + stdout = default_value + + return stdout + + +class SingleNodeTopology: + def __init__(self): + self.pcie_latency = 0.0 + self.pcie_bandwidth = float('inf') + self.nvlink_bandwidth = -1.0 + self.nb_devices = 8 + + self.machine = {} + self.devices = [] + self.links = [] + self.json_object = None + + def calculate_cpu_flops(self): + # Get number sockets + cmd = "lscpu | grep 'Socket(s)' | awk '{print $NF}'" + err_msg = "Failed to get number of sockets" + default_value = 4 + nb_sockets = call_cmd(cmd, err_msg, default_value) + + # Get number of cores per socket + cmd = "lscpu | grep 'Core(s) per socket' | awk '{print $NF}'" + err_msg = "Failed to get number of cores per socket" + default_value = 20 + nb_cores_per_socket = call_cmd(cmd, err_msg, default_value) + + # Get clock speed + cmd = "lscpu | grep GHz | awk -F '@' '{print $NF}' | awk -F 'G' '{print $1}'" + err_msg = "Failed to get cpu clock rate" + default_value = 2.4 + clock_rate = call_cmd(cmd, err_msg, default_value) + + # Get number of FMA units + # TODO(changtao02): find a way to detect this value + nb_fmas = 2 + + # Get SIMD width + simd_width_sp = 0 + simd_width_dp = 0 + + cmd = "lscpu | grep sse" + err_msg = "Failed to get cpu vector size" + default_value = "sse" + vector_size = call_cmd(cmd, err_msg, default_value) + + if vector_size: + simd_width_sp = 4 # 128 / 32 + simd_width_dp = 2 # 128 / 64 + + cmd = "lscpu | grep avx2" + err_msg = "Failed to get cpu vector size" + default_value = "avx2" + vector_size = call_cmd(cmd, err_msg, default_value) + + if vector_size: + simd_width_sp = 8 # 256 / 32 + simd_width_dp = 4 # 256 / 64 + + cmd = "lscpu | grep avx512" + err_msg = "Failed to get cpu vector size" + default_value = "avx512" + vector_size = call_cmd(cmd, err_msg, default_value) + + if vector_size: + simd_width_sp = 16 # 512 / 32 + simd_width_dp = 8 # 512 / 64 + + gflops_per_element = ( + int(nb_sockets) + * int(nb_cores_per_socket) + * float(clock_rate) + * nb_fmas + ) + sp_gflops = gflops_per_element * simd_width_sp + dp_gflops = gflops_per_element * simd_width_dp + + self.machine['sp_gflops'] = sp_gflops + self.machine['dp_gflops'] = dp_gflops + + def pcie_gen2bandwidth(self, pcie_generation): + if pcie_generation == 1: + return 0.25 + elif pcie_generation == 2: + return 0.5 + elif pcie_generation == 3: + return 1.0 + elif pcie_generation == 4: + return 2.0 + elif pcie_generation == 5: + return 4.0 + elif pcie_generation == 6: + return 8.0 + + def model2gflops(self, model): + if "H100" in model and "SXM5" in model: + return 60000, 30000 + elif "H100" in model and "PCIe" in model: + return 48000, 24000 + elif "A100" in model: + return 19500, 9700 + elif "V100" in model: + return 15700, 7800 + elif "P100" in model: + return 10600, 5300 + + def get_link_bandwidth(self, source_id, target_id): + # Get link type + row_id = 2 + source_id + column_id = 2 + target_id + + cmd = ( + "cat matrix.txt | awk 'FNR==" + + str(row_id) + + " {print $" + + str(column_id) + + "}'" + ) + err_msg = "Failed to get topo matrix" + default_value = "NVL" + link_type = call_cmd(cmd, err_msg, default_value) + + link_bandwidth = self.pcie_bandwidth + + if "NV" in link_type: + if self.nvlink_bandwidth == -1.0: + cmd = "nvidia-smi nvlink -s -i 0 | tail -n 1 | awk '{print $3}'" + err_msg = "Failed to get nvlink bandwidth" + default_value = "25" + self.nvlink_bandwidth = float( + call_cmd(cmd, err_msg, default_value) + ) + + link_bandwidth = int(link_type[2:]) * self.nvlink_bandwidth + link_type = "NVL" + + return link_type, link_bandwidth + + def get_host_info(self): + # Get hostname + cmd = "hostname -s" + err_msg = "Failed to get hostname" + default_value = "localhost" + hostname = call_cmd(cmd, err_msg, default_value).strip() + + # Get ip address + cmd = "hostname -i" + err_msg = "Failed to get host ip address" + default_value = "127.0.0.1" + ip_addr = call_cmd(cmd, err_msg, default_value).strip() + + # Get CPU memory (GB) + cmd = "cat /proc/meminfo | grep 'MemAvailable' | awk -F ':' '{print $NF}' | awk '{print $1}'" + err_msg = "Failed to get cpu memory" + default_value = "41366484" + cpu_memory = int(call_cmd(cmd, err_msg, default_value)) // 1e6 + + # Get single-point flops and double-point flops (GFLOPs) + self.calculate_cpu_flops() + + self.machine['hostname'] = hostname + self.machine['addr'] = ip_addr + self.machine['memory'] = cpu_memory + + def get_device_info(self): + # Get device count + cmd = "nvidia-smi -L | wc -l" + err_msg = "Failed to get device count" + default_value = "8" + self.nb_devices = int(call_cmd(cmd, err_msg, default_value)) + + # Get PCIe latency and bandwidth (ms, GB/s) + for i in range(self.nb_devices): + cmd = ( + "nvidia-smi --id=" + + str(i) + + " --query-gpu=pcie.link.gen.max --format=csv,noheader" + ) + err_msg = "Failed to get max pcie link generation" + default_value = "4" + pcie_generation = int(call_cmd(cmd, err_msg, default_value)) + + cmd = ( + "nvidia-smi --id=" + + str(i) + + " --query-gpu=pcie.link.width.max --format=csv,noheader" + ) + err_msg = "Failed to get max pcie link width" + default_value = "16" + pcie_width = int(call_cmd(cmd, err_msg, default_value)) + + self.pcie_bandwidth = min( + self.pcie_bandwidth, + self.pcie_gen2bandwidth(pcie_generation) * pcie_width, + ) + + dev_global_ids = [] + dev_local_ids = [] + dev_types = [] + dev_models = [] + dev_memories = [] # GiB + dev_sp_gflops = [] # GB/s + dev_dp_gflops = [] # GB/s + + # Get device info + for i in range(self.nb_devices): + dev_global_ids.append(i) + dev_local_ids.append(i) + dev_types.append("GPU") + + cmd = ( + "nvidia-smi --id=" + + str(i) + + " --query-gpu=name --format=csv,noheader" + ) + err_msg = "Failed to get device name" + default_value = "NVIDIA A100-SXM4-40GB" + dev_models.append(call_cmd(cmd, err_msg, default_value).strip()) + + cmd = ( + "nvidia-smi --id=" + + str(i) + + " --query-gpu=memory.free --format=csv,noheader | awk '{print $1}'" + ) + err_msg = "Failed to get device available memory" + default_value = "40536" + dev_memories.append( + int(call_cmd(cmd, err_msg, default_value)) // 1e3 + ) + + sp_gflops, dp_gflops = self.model2gflops(dev_models[i]) + dev_sp_gflops.append(sp_gflops) + dev_dp_gflops.append(dp_gflops) + + for i in range(len(dev_global_ids)): + device = {} + device['global_id'] = dev_global_ids[i] + device['local_id'] = dev_local_ids[i] + device['type'] = dev_types[i] + device['model'] = dev_models[i] + device['memory'] = dev_memories[i] + device['sp_gflops'] = dev_sp_gflops[i] + device['dp_gflops'] = dev_dp_gflops[i] + self.devices.append(device) + + self.machine['latency'] = self.pcie_latency + self.machine['bandwidth'] = self.pcie_bandwidth + self.machine['devices'] = self.devices + + def get_link_info(self): + link_source_global_ids = [] + link_target_global_ids = [] + link_types = [] + link_latencies = [] # ms + link_bandwidths = [] # GB/s + + cmd = "nvidia-smi topo -m > matrix.txt" + err_msg = "Failed to get topo matrix" + default_value = "" + call_cmd(cmd, err_msg, default_value) + + # Get link info between devices + for i in range(self.nb_devices): + for j in range(self.nb_devices): + if i == j: + link_types.append("X") + link_bandwidths.append(-1.0) + else: + link_source_global_ids.append(i) + link_target_global_ids.append(j) + link_latencies.append(0.0) + if i > j: + index = j * self.nb_devices + i + link_types.append(link_types[index]) + link_bandwidths.append(link_bandwidths[index]) + elif i < j: + link_type, link_bandwidth = self.get_link_bandwidth( + i, j + ) + link_types.append(link_type) + link_bandwidths.append(link_bandwidth) + + for i in reversed(range(self.nb_devices)): + link_types.pop(i * self.nb_devices + i) + link_bandwidths.pop(i * self.nb_devices + i) + + cmd = "rm matrix.txt" + err_msg = "Failed to delete matrix.txt" + default_value = "" + call_cmd(cmd, err_msg, default_value) + + for i in range(len(link_types)): + link = {} + link['source_global_id'] = link_source_global_ids[i] + link['target_global_id'] = link_target_global_ids[i] + link['type'] = link_types[i] + link['latency'] = link_latencies[i] + link['bandwidth'] = link_bandwidths[i] + self.links.append(link) + + self.machine['links'] = self.links + + def detect(self): + # Get host info + self.get_host_info() + + # Get device info + self.get_device_info() + + # Get link info between devices + self.get_link_info() + + self.json_object = json.dumps(self.machine, indent=4) + print(self.json_object) + + def dump(self, output_path): + with open(output_path, "w") as outfile: + json.dump(self.machine, outfile, indent=4) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_topology.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_topology.py new file mode 100644 index 0000000000000..6807d22ffc3f1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_topology.py @@ -0,0 +1,33 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from paddle.distributed.auto_parallel.topo import SingleNodeTopology + + +def check_empty_json_object(json_object): + return json_object is not None + + +class TestSingleNodeTopology(unittest.TestCase): + def test_empty_topology_json_object(self): + topo = SingleNodeTopology() + topo.detect() + + self.assertTrue(check_empty_json_object(topo.json_object)) + + +if __name__ == "__main__": + unittest.main() From 2131ee5c54e2dd9ad4948170a24cc59da0c16eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Wed, 12 Apr 2023 14:12:45 +0800 Subject: [PATCH 44/59] remove *hccl*.cc (#52798) * remove c_comm_init_hccl_op.cc and c_gen_hccl_id_op.cc * remove gen_hccl_id_op.cc --- .../fluid/operators/collective/CMakeLists.txt | 2 - .../collective/c_comm_init_hccl_op.cc | 82 ---- .../operators/collective/c_gen_hccl_id_op.cc | 75 ---- .../operators/collective/gen_hccl_id_op.cc | 83 ---- .../collective/gen_hccl_id_op_helper.cc | 378 ------------------ .../collective/gen_hccl_id_op_helper.h | 52 --- .../fleet/meta_optimizers/common.py | 26 -- python/paddle/fluid/framework.py | 2 - .../unittests/ir/inference/program_config.py | 2 - 9 files changed, 702 deletions(-) delete mode 100644 paddle/fluid/operators/collective/c_comm_init_hccl_op.cc delete mode 100644 paddle/fluid/operators/collective/c_gen_hccl_id_op.cc delete mode 100644 paddle/fluid/operators/collective/gen_hccl_id_op.cc delete mode 100644 paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc delete mode 100644 paddle/fluid/operators/collective/gen_hccl_id_op_helper.h diff --git a/paddle/fluid/operators/collective/CMakeLists.txt b/paddle/fluid/operators/collective/CMakeLists.txt index 3855733a98271..b356497962689 100644 --- a/paddle/fluid/operators/collective/CMakeLists.txt +++ b/paddle/fluid/operators/collective/CMakeLists.txt @@ -27,8 +27,6 @@ register_operators( gen_bkcl_id_op c_gen_nccl_id_op gen_nccl_id_op - c_gen_hccl_id_op - gen_hccl_id_op c_gen_cncl_id_op DEPS ${COLLECTIVE_DEPS}) diff --git a/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc b/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc deleted file mode 100644 index 98bcd78b9dadc..0000000000000 --- a/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace framework { -class Scope; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { - -class CCommInitOpAscend : public framework::OperatorBase { - public: - CCommInitOpAscend(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - void RunImpl(const framework::Scope& scope, - const platform::Place& place) const override { - PADDLE_ENFORCE_EQ(platform::is_npu_place(place), - true, - platform::errors::PreconditionNotMet( - "CCommInitOpAscend can run on npu place only.")); - - auto var = scope.FindVar(Input("X")); - PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::InvalidArgument("Input con not be empty.")); - - PADDLE_THROW(platform::errors::PreconditionNotMet( - "PaddlePaddle should compile with NPU.")); - } -}; - -class CCommInitOpAscendMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "Raw variable contains a NCCL UniqueId instaces."); - AddComment(R"DOC( -CCommInit operator - -Initialize collective communicatoin context within this trainer -)DOC"); - AddAttr("rank_ids", - "(int) The number of ranks of distributed trainers"); - AddAttr("rank", - "(int) The rank of the trainer in distributed training."); - AddAttr("device_id", - "(int) The deivce_id on which to initialize the communicator." - "Now, you only have to set this attr manually for pipeline " - "training. Otherwise, make it as default.") - .SetDefault(-1); - AddAttr("ring_id", "(int default 0) user specified ring id") - .SetDefault(0); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(c_comm_init_hccl, - ops::CCommInitOpAscend, - ops::CCommInitOpAscendMaker); diff --git a/paddle/fluid/operators/collective/c_gen_hccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_hccl_id_op.cc deleted file mode 100644 index 130c45dfaad50..0000000000000 --- a/paddle/fluid/operators/collective/c_gen_hccl_id_op.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include - -#include "glog/logging.h" -#include "paddle/fluid/framework/op_proto_maker.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/var_type_traits.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/gen_comm_id_helper.h" -#include "paddle/fluid/platform/place.h" - -namespace paddle { -namespace operators { - -class CGenHCCLIdOp : public framework::OperatorBase { - public: - CGenHCCLIdOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override {} -}; - -class CGenHCCLIdOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - VLOG(3) << "ele"; - AddOutput("Out", "Raw variable contains a HCCL UniqueId instaces."); - AddComment(R"DOC( -CGenHCCLId operator - -For trainer 0: generate a new UniqueId and send it to all the other trainers. -For trainer 1~n: start a gRPC server to get the UniqueId, once got, stop the server. -)DOC"); - AddAttr("endpoint", - "(string), e.g. 127.0.0.1:6175 " - "current listen endpoint"); - AddAttr>( - "other_endpoints", - "['trainer1_ip:port', 'trainer2_ip:port', ...] " - "list of other trainer endpoints") - .SetDefault({}); - AddAttr("rank", - "(int default 0) " - "The rank of the trainer in distributed training.") - .SetDefault(0); - AddAttr("ring_id", "(int default 0) user specified ring id") - .SetDefault(0); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(c_gen_hccl_id, ops::CGenHCCLIdOp, ops::CGenHCCLIdOpMaker); diff --git a/paddle/fluid/operators/collective/gen_hccl_id_op.cc b/paddle/fluid/operators/collective/gen_hccl_id_op.cc deleted file mode 100644 index d472d589de544..0000000000000 --- a/paddle/fluid/operators/collective/gen_hccl_id_op.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "glog/logging.h" -#include "paddle/fluid/framework/op_proto_maker.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/var_type_traits.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/fluid/string/split.h" - -namespace paddle { -namespace operators { - -class GenHCCLIdOp : public framework::OperatorBase { - public: - GenHCCLIdOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override {} -}; - -class GenHCCLIdOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddOutput("HCCLID", "Raw variable contains a HCCL UniqueId instaces."); - AddComment(R"DOC( -GenHCCLId operator - -For trainer 0: generate a new UniqueId and send it to all the other trainers. -For trainer 1~n: start a gRPC server to get the UniqueId, once got, stop the server. -)DOC"); - AddAttr>( - "trainers", - "['trainer0_ip:port', 'trainer1_ip:port', ...] " - "list of all trainer endpoints") - .SetDefault({}); - AddAttr("trainer_id", - "(int) " - "The index of the trainer in distributed training."); - AddAttr("hccl_comm_num", - "(int default 1) " - "The number of nccl communicator num.") - .SetDefault(1); - AddAttr("use_hierarchical_allreduce", - "(bool default false) " - "Wheter to use hierarchical allreduce.") - .SetDefault(false); - AddAttr("hierarchical_allreduce_inter_nranks", - "(int default 1) " - "Wheter to use hierarchical allreduce.") - .SetDefault(-1); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(gen_hccl_id, ops::GenHCCLIdOp, ops::GenHCCLIdOpMaker); diff --git a/paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc b/paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc deleted file mode 100644 index 41367305e2666..0000000000000 --- a/paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc +++ /dev/null @@ -1,378 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "glog/logging.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/var_type_traits.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/split.h" - -DECLARE_int32(get_host_by_name_time); - -namespace paddle { -namespace operators { - -constexpr char COMM_HEAD[] = "_pd_gen_comm_id_"; -#define HCCL_UNIQUE_ID_BYTES 1024 - -// Check system calls, such as socket, bind. -#define CHECK_SYS_CALL(call, name) \ - do { \ - int retval; \ - CHECK_SYS_CALL_VAL(call, name, retval); \ - } while (false) - -#define CHECK_SYS_CALL_VAL(call, name, retval) \ - do { \ - RETRY_SYS_CALL_VAL(call, name, retval); \ - if (retval == -1) { \ - PADDLE_THROW(platform::errors::Unavailable( \ - "Call to %s failed: %s", name, strerror(errno))); \ - } \ - } while (false) - -#define RETRY_SYS_CALL_VAL(call, name, retval) \ - do { \ - retval = (call); \ - if (retval == -1 && \ - (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN)) { \ - LOG(WARNING) << "Call " << name << " returned " << strerror(errno) \ - << " retry"; \ - } else { \ - break; \ - } \ - } while (true) - -static int SocketSend(int fd, const char* buffer, int size) { - int offset = 0; - int bytes = 0; - while (offset < size) { - bytes = send(fd, buffer + offset, size - offset, 0); - if (bytes == -1) { - if (errno != EINTR && errno != EWOULDBLOCK && errno != EAGAIN) { - // send failed - return -1; - } else { - bytes = 0; - } - } - offset += bytes; - } - return offset; -} - -static int SocketRecv(int fd, char* buffer, int size) { - int offset = 0; - int bytes = 0; - while (offset < size) { - bytes = recv(fd, buffer + offset, size - offset, 0); - if (bytes == 0) { - // closed by client, maybe probing alive client - return 0; - } - if (bytes == -1) { - if (errno != EINTR && errno != EWOULDBLOCK && errno != EAGAIN) { - return -1; - } else { - bytes = 0; - } - } - offset += bytes; - } - return offset; -} - -static void BindOrConnectFailed(int timeout, - int* try_times, - int* total_time, - const char* op, - const std::string& ep) { - PADDLE_ENFORCE_LT( - *total_time, - timeout, - platform::errors::Unavailable("%s addr=%s timeout, failed reason: %s", - op, - ep.c_str(), - strerror(errno))); - ++(*try_times); - int retry_time = std::min(*try_times * 500, 3000); // max 3 seconds - *total_time += retry_time; - - LOG(WARNING) << op << " addr=" << ep << " failed " << *try_times - << " times with reason: " << strerror(errno) << " retry after " - << retry_time / 1000.0 << " seconds"; - std::this_thread::sleep_for(std::chrono::milliseconds(retry_time)); -} - -int CreateListenSocket(const std::string& ep) { - auto addr = paddle::string::Split(ep, ':'); - PADDLE_ENFORCE_EQ( - addr.size(), - 2UL, - platform::errors::InvalidArgument( - "The endpoint should contain host and port, but got %s.", ep)); - std::string host = addr[0]; - int port = std::stoi(addr[1]); - - // creating socket fd - int server_fd = -1; - CHECK_SYS_CALL_VAL(socket(AF_INET, SOCK_STREAM, 0), "socket", server_fd); - - // NOTE. Solutions to `Address already in use`. - // 1. Reuse addr&port. Otherwise, once the server closes the socket - // before client, the server will enter TIME-WAIT status. If we bind port - // again, the error `Address already in use` will appear. - // 2. Or we can close the client first to ensure that the server does - // not enter the TIME-WAIT state. But this is obviously not as convenient - // as the reuse method. - int opt = 1; -#if defined(SO_REUSEPORT) - // since Linux kernel 3.9 - CHECK_SYS_CALL(setsockopt(server_fd, - SOL_SOCKET, - SO_REUSEADDR | SO_REUSEPORT, - &opt, - sizeof(opt)), - "setsockopt"); -#else - CHECK_SYS_CALL( - setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)), - "setsockopt"); -#endif - - struct sockaddr_in address; - address.sin_family = AF_INET; - address.sin_addr.s_addr = INADDR_ANY; - address.sin_port = htons(port); - - // TODO(wangxi) Set from env, default 900s=15min - int timeout = 900 * 1000; - int try_times = 0; - int total_time = 0; - while (true) { - int ret_val = -1; - RETRY_SYS_CALL_VAL( - bind(server_fd, (struct sockaddr*)&address, sizeof(address)), - "bind", - ret_val); - - if (ret_val == -1) { - BindOrConnectFailed(timeout, &try_times, &total_time, "bind", ep); - continue; - } - break; - } - - CHECK_SYS_CALL(listen(server_fd, 3), "listen"); - LOG(INFO) << "Server listening on: " << ep << " successful."; - return server_fd; -} - -void CloseSocket(int fd) { CHECK_SYS_CALL(close(fd), "close"); } - -static int SocketAccept(int server_fd, const char* head) { - struct sockaddr_in client_addr; - socklen_t addr_length = sizeof(client_addr); - char buffer[1024] = {0}; - int conn = -1; - - while (true) { - CHECK_SYS_CALL_VAL(accept(server_fd, - reinterpret_cast(&client_addr), - &addr_length), - "accept", - conn); - - int ret_val = SocketRecv(conn, buffer, strlen(head)); - if (ret_val > 0 && strncmp(buffer, head, strlen(head)) == 0) { - break; // accept client - } else { - VLOG(3) << "socket read failed with ret_val=" << ret_val; - CloseSocket(conn); - } - } - return conn; -} - -static int ConnectAddr(const std::string& ep, const char* head) { - auto addr = paddle::string::Split(ep, ':'); - PADDLE_ENFORCE_EQ( - addr.size(), - 2UL, - platform::errors::InvalidArgument( - "The endpoint should contain host and port, but got %s.", ep)); - std::string host = addr[0]; - int port = std::stoi(addr[1]); - - int sock = -1; - CHECK_SYS_CALL_VAL(socket(AF_INET, SOCK_STREAM, 0), "socket", sock); - - struct sockaddr_in server_addr; - memset(&server_addr, 0, sizeof(server_addr)); - server_addr.sin_family = AF_INET; - server_addr.sin_port = htons(port); - - char* ip = NULL; - struct hostent* hp = NULL; - // sleep for get_host_by_name_time seconds. - for (int i = 0; 2 * i < FLAGS_get_host_by_name_time; i++) { - hp = gethostbyname(host.c_str()); - if (hp != NULL) { - break; - } - std::this_thread::sleep_for(std::chrono::seconds(2)); - LOG(WARNING) << "gethostbyname " << host.c_str() << " error!"; - } - PADDLE_ENFORCE_NOT_NULL( - hp, - platform::errors::InvalidArgument("Fail to get host by name %s.", host)); - - int i = 0; - while (hp->h_addr_list[i] != NULL) { - ip = inet_ntoa(*(struct in_addr*)hp->h_addr_list[i]); - VLOG(3) << "gethostbyname host:" << host << " ->ip: " << ip; - break; - } - - PADDLE_ENFORCE_GT(inet_pton(AF_INET, ip, &server_addr.sin_addr), - 0, - platform::errors::Unavailable( - "Open address %s failed: %s", ep, strerror(errno))); - - // TODO(wangxi) Set from env, default 900s=15min - int timeout = 900 * 1000; - int try_times = 0; - int total_time = 0; - while (true) { - int ret_val = -1; - RETRY_SYS_CALL_VAL( - connect(sock, (struct sockaddr*)&server_addr, sizeof(server_addr)), - "connect", - ret_val); - - if (ret_val == -1) { - BindOrConnectFailed(timeout, &try_times, &total_time, "connect", ep); - continue; - } - - CHECK_SYS_CALL(SocketSend(sock, head, strlen(head)), "send"); - break; - } - return sock; -} - -static void RecvHCCLID(int conn, HcclRootInfo* hccl_id) { - char buffer[1024] = {0}; - static_assert(HCCL_UNIQUE_ID_BYTES <= 1024, - "hccl id bytes must <= buffer size"); - - CHECK_SYS_CALL(SocketRecv(conn, buffer, HCCL_UNIQUE_ID_BYTES), - "recv hccl id"); - memcpy(hccl_id, buffer, HCCL_UNIQUE_ID_BYTES); -} - -static void SendHCCLID(int conn, HcclRootInfo* hccl_id) { - char buffer[1024] = {0}; - memcpy(buffer, hccl_id, HCCL_UNIQUE_ID_BYTES); - - CHECK_SYS_CALL(SocketSend(conn, buffer, HCCL_UNIQUE_ID_BYTES), - "send hccl id"); -} - -void SendBroadCastHCCLID(std::vector servers, - int hccl_comm_num, - std::function func, - const framework::Scope& scope) { - // connect with server - std::vector connects; - for (auto server : servers) { - VLOG(3) << "connecting endpoint: " << server; - int conn = ConnectAddr(server, COMM_HEAD); - connects.push_back(conn); - } - VLOG(3) << "connecting completed..."; - - for (int i = 0; i < hccl_comm_num; ++i) { - std::string var_name = func(i); - auto var = scope.FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL( - var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); - auto hccl_id = var->GetMutable(); - PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclGetRootInfo(hccl_id)); - - int j = 0; - for (auto conn : connects) { - VLOG(3) << "sending hccl_id_var: " << var_name << " to " << servers[j] - << " hccl_comm_no: " << i; - SendHCCLID(conn, hccl_id); - ++j; - } - VLOG(3) << "sending completed..."; - } - - // close client - for (auto conn : connects) { - CloseSocket(conn); - } -} - -void RecvBroadCastHCCLID(std::string endpoint, - int hccl_comm_num, - std::function func, - const framework::Scope& scope) { - int server = CreateListenSocket(endpoint); - RecvBroadCastHCCLID(server, endpoint, hccl_comm_num, func, scope); - CloseSocket(server); -} - -void RecvBroadCastHCCLID(int server_fd, - std::string endpoint, - int hccl_comm_num, - std::function func, - const framework::Scope& scope) { - int client = SocketAccept(server_fd, COMM_HEAD); - - for (int i = 0; i < hccl_comm_num; ++i) { - std::string var_name = func(i); - auto var = scope.FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL( - var, - platform::errors::NotFound("Variable with name %s is not found", - var_name.c_str())); - auto hccl_id = var->GetMutable(); - - VLOG(3) << "trainer: " << endpoint << " receiving hccl_id_var: " << var_name - << " from trainer 0, hccl_comm_no: " << i; - RecvHCCLID(client, hccl_id); - } - VLOG(3) << "receiving completed..."; - CloseSocket(client); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/collective/gen_hccl_id_op_helper.h b/paddle/fluid/operators/collective/gen_hccl_id_op_helper.h deleted file mode 100644 index a64a44f9f6166..0000000000000 --- a/paddle/fluid/operators/collective/gen_hccl_id_op_helper.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -namespace paddle { -namespace framework { -class Scope; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { - -int CreateListenSocket(const std::string& ep); - -void CloseSocket(int fd); - -void SendBroadCastHCCLID(std::vector servers, - int nccl_comm_num, - std::function func, - const framework::Scope& scope); - -// server listen on endpoint, then recv nccl id -void RecvBroadCastHCCLID(std::string endpoint, - int nccl_comm_num, - std::function func, - const framework::Scope& scope); - -// recv nccl id from socket -void RecvBroadCastHCCLID(int server_fd, - std::string endpoint, - int nccl_comm_num, - std::function func, - const framework::Scope& scope); -} // namespace operators -} // namespace paddle diff --git a/python/paddle/distributed/fleet/meta_optimizers/common.py b/python/paddle/distributed/fleet/meta_optimizers/common.py index c9474d397417a..15bd883e970be 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/common.py +++ b/python/paddle/distributed/fleet/meta_optimizers/common.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import paddle from paddle.framework import core @@ -196,31 +195,6 @@ def _add_sync_by_allreduce(block): OP_ROLE_KEY: OpRole.Forward, }, ) - elif core.is_compiled_with_custom_device('npu'): - block.append_op( - type='c_gen_hccl_id', - inputs={}, - outputs={'Out': comm_id_var}, - attrs={ - 'rank': rank, - 'endpoint': current_endpoint, - 'other_endpoints': other_endpoints, - 'ring_id': ring_id, - OP_ROLE_KEY: OpRole.Forward, - }, - ) - block.append_op( - type='c_comm_init_hccl', - inputs={'X': comm_id_var}, - outputs={}, - attrs={ - 'rank': rank, - 'ring_id': ring_id, - 'device_id': int(os.getenv("FLAGS_selected_npus")), - 'rank_ids': nranks, - OP_ROLE_KEY: OpRole.Forward, - }, - ) else: raise ValueError( "comm_id must be generated in paddlepaddle-xpu or paddlepaddle-xpu." diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index db17ea368849d..537abbc50a8a2 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -2758,8 +2758,6 @@ class Operator: 'heter_listen_and_serv', 'c_wait_comm', 'c_wait_compute', - 'c_gen_hccl_id', - 'c_comm_init_hccl', 'copy_cross_scope', 'c_gen_cncl_id', } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py index 4591d5512c092..04e804ea135f7 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py @@ -131,8 +131,6 @@ def __repr__(self): 'heter_listen_and_serv', 'c_wait_comm', 'c_wait_compute', - 'c_gen_hccl_id', - 'c_comm_init_hccl', 'copy_cross_scope', } From 895b8737f6622de0fe5117b99626cee2ca81bdb0 Mon Sep 17 00:00:00 2001 From: cyber-pioneer <116002591+cyber-pioneer@users.noreply.github.com> Date: Wed, 12 Apr 2023 14:25:14 +0800 Subject: [PATCH 45/59] fix prim resnet cinn value (#52813) --- test/prim/model/test_resnet_prim_cinn.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/prim/model/test_resnet_prim_cinn.py b/test/prim/model/test_resnet_prim_cinn.py index 2012d84546e64..eee27720313de 100644 --- a/test/prim/model/test_resnet_prim_cinn.py +++ b/test/prim/model/test_resnet_prim_cinn.py @@ -94,16 +94,16 @@ ] DY2ST_PRIM_CINN_GT = [ - 5.828784942626953, - 8.34173583984375, - 5.116049289703369, - 8.511833190917969, - 7.9524407386779785, - 7.395752906799316, - 9.666715621948242, - 8.277752876281738, - 8.718518257141113, - 10.199666023254395, + 5.828786849975586, + 8.332868576049805, + 5.038548469543457, + 8.554015159606934, + 8.106254577636719, + 7.493070125579834, + 9.479158401489258, + 8.270158767700195, + 8.324719429016113, + 10.140411376953125, ] if core.is_compiled_with_cuda(): From 523fae593c9babd77745814f90bd28f1ce3c5dcb Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 12 Apr 2023 14:46:34 +0800 Subject: [PATCH 46/59] fix CMakeLists.txt error of incorrect default value (#52780) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9dc6febdfaaa5..ef5d415212eeb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -257,7 +257,7 @@ option(WITH_BOX_PS "Compile with box_ps support" OFF) option(WITH_XBYAK "Compile with xbyak support" ON) option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE}) -option(WITH_HETERPS "Compile with heterps" OFF}) +option(WITH_HETERPS "Compile with heterps" OFF) option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF) option(WITH_INFERENCE_NVTX "Paddle inference with nvtx for profiler" OFF) From 41e37d4c84b053a386b33ce4a863faf99416010e Mon Sep 17 00:00:00 2001 From: jiangcheng Date: Wed, 12 Apr 2023 14:56:42 +0800 Subject: [PATCH 47/59] [CINN] add python.version.cinn_commit api (#52727) * [CINN] add python.version.cinn_commit api * update cinn version get function * fix cinn_commit in setup.py also need len>0 check bug --- python/env_dict.py.in | 4 ++- python/setup.py.in | 52 +++++++++++++++++++++++++++++- setup.py | 75 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/python/env_dict.py.in b/python/env_dict.py.in index 5b2078c67510c..00ca04dc56cde 100644 --- a/python/env_dict.py.in +++ b/python/env_dict.py.in @@ -73,5 +73,7 @@ env_dict={ 'JIT_RELEASE_WHL':'@JIT_RELEASE_WHL@', 'WITH_PSLIB':'@WITH_PSLIB@', 'PYBIND_INCLUDE_DIR':'@PYBIND_INCLUDE_DIR@', - 'WITH_PYTHON':'@WITH_PYTHON@' + 'WITH_PYTHON':'@WITH_PYTHON@', + 'WITH_CINN':'@WITH_CINN@', + 'CINN_SOURCE_DIR':'@CINN_SOURCE_DIR@' } diff --git a/python/setup.py.in b/python/setup.py.in index 650a4449b24c6..fa32dcf13c3d5 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -100,6 +100,32 @@ def is_taged(): else: return False +def get_cinn_version(): + if '@WITH_CINN@' != 'ON': + return "False" + + cinn_git_version = 'Unknown' + try: + cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null'] + cinn_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE, cwd='@CINN_SOURCE_DIR@').communicate()[0].strip() + if len(cinn_tag) > 0: + cinn_git_version = cinn_tag + except: + pass + + if cinn_git_version == 'Unknown': + try: + cmd = ['git', 'rev-parse', 'HEAD'] + cinn_commit = subprocess.Popen(cmd, stdout = subprocess.PIPE, + cwd='@CINN_SOURCE_DIR@').communicate()[0].strip() + if len(cinn_commit) > 0: + cinn_git_version = cinn_commit + except: + pass + + cinn_git_version = cinn_git_version.decode('utf-8') + return str(cinn_git_version) + def write_version_py(filename='paddle/version/__init__.py'): cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY # @@ -115,6 +141,7 @@ xpu_xccl_version = '%(xpu_xccl)s' istaged = %(istaged)s commit = '%(commit)s' with_mkl = '%(with_mkl)s' +cinn_version = '%(cinn)s' __all__ = ['cuda', 'cudnn', 'show', 'xpu', 'xpu_xccl'] @@ -143,6 +170,8 @@ def show(): xpu_xccl: the xpu xccl version of package. It will return `False` if non-XPU version paddle package is installed + cinn: the cinn version of package. It will return `False` if paddle package is not compiled with CINN + Examples: .. code-block:: python @@ -159,6 +188,7 @@ def show(): # cudnn: '7.6.5' # xpu: '20230114' # xpu_xccl: '1.0.7' + # cinn: False # Case 2: paddle is not tagged paddle.version.show() @@ -167,6 +197,7 @@ def show(): # cudnn: '7.6.5' # xpu: '20230114' # xpu_xccl: '1.0.7' + # cinn: False """ if istaged: print('full_version:', full_version) @@ -180,6 +211,7 @@ def show(): print('cudnn:', cudnn_version) print('xpu:', xpu_version) print('xpu_xccl:', xpu_xccl_version) + print('cinn:', cinn_version) def mkl(): return with_mkl @@ -251,6 +283,23 @@ def xpu_xccl(): """ return xpu_xccl_version + +def cinn(): + """Get CINN version of paddle package. + + Returns: + string: Return the version information of CINN. If paddle package is not compiled with CINN, it will return False. + + Examples: + .. code-block:: python + + import paddle + + paddle.version.cinn() + # False + + """ + return cinn_version ''' commit = git_commit() @@ -275,7 +324,8 @@ def xpu_xccl(): 'xpu_xccl': get_xpu_xccl_version(), 'commit': commit, 'istaged': is_taged(), - 'with_mkl': '@WITH_MKL@'}) + 'with_mkl': '@WITH_MKL@', + 'cinn': get_cinn_version()}) write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version/__init__.py') diff --git a/setup.py b/setup.py index daa9dbd5cc6e4..288500feba854 100644 --- a/setup.py +++ b/setup.py @@ -427,6 +427,57 @@ def is_taged(): return False +def get_cinn_version(): + if env_dict.get("WITH_CINN") != 'ON': + return "False" + + cinn_git_version = 'Unknown' + # try get cinn tag name + try: + cmd = [ + 'git', + 'describe', + '--exact-match', + '--tags', + 'HEAD', + '2>/dev/null', + ] + cinn_tag = ( + subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + cwd=env_dict.get("CINN_SOURCE_DIR"), + ) + .communicate()[0] + .strip() + ) + if len(cinn_tag) > 0: + cinn_git_version = cinn_tag + except: + pass + + if cinn_git_version == 'Unknown': + # try get cinn commit id + try: + cmd = ['git', 'rev-parse', 'HEAD'] + cinn_commit = ( + subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + cwd=env_dict.get("CINN_SOURCE_DIR"), + ) + .communicate()[0] + .strip() + ) + if len(cinn_commit) > 0: + cinn_git_version = cinn_commit + except: + pass + + cinn_git_version = cinn_git_version.decode('utf-8') + return str(cinn_git_version) + + def write_version_py(filename='paddle/version/__init__.py'): cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY # @@ -442,6 +493,7 @@ def write_version_py(filename='paddle/version/__init__.py'): istaged = %(istaged)s commit = '%(commit)s' with_mkl = '%(with_mkl)s' +cinn_version = '%(cinn)s' __all__ = ['cuda', 'cudnn', 'show', 'xpu', 'xpu_xccl'] @@ -470,6 +522,8 @@ def show(): xpu_xccl: the xpu xccl version of package. It will return `False` if non-XPU version paddle package is installed + cinn: the cinn version of package. It will return `False` if paddle package is not compiled with CINN + Examples: .. code-block:: python @@ -486,6 +540,7 @@ def show(): # cudnn: '7.6.5' # xpu: '20230114' # xpu_xccl: '1.0.7' + # cinn: False # Case 2: paddle is not tagged paddle.version.show() @@ -494,6 +549,7 @@ def show(): # cudnn: '7.6.5' # xpu: '20230114' # xpu_xccl: '1.0.7' + # cinn: False """ if istaged: print('full_version:', full_version) @@ -507,6 +563,7 @@ def show(): print('cudnn:', cudnn_version) print('xpu:', xpu_version) print('xpu_xccl:', xpu_xccl_version) + print('cinn:', cinn_version) def mkl(): return with_mkl @@ -578,6 +635,23 @@ def xpu_xccl(): """ return xpu_xccl_version + +def cinn(): + """Get CINN version of paddle package. + + Returns: + string: Return the version information of CINN. If paddle package is not compiled with CINN, it will return False. + + Examples: + .. code-block:: python + + import paddle + + paddle.version.cinn() + # False + + """ + return cinn_version ''' commit = git_commit() @@ -605,6 +679,7 @@ def xpu_xccl(): 'commit': commit, 'istaged': is_taged(), 'with_mkl': env_dict.get("WITH_MKL"), + 'cinn': get_cinn_version(), } ) From 998235e66a9160d127d204743be9d6dd462c0f3c Mon Sep 17 00:00:00 2001 From: YepKong <48173002+YepKong@users.noreply.github.com> Date: Wed, 12 Apr 2023 15:37:42 +0800 Subject: [PATCH 48/59] add autogen code support for squared_l2_norm_op (#52662) * add autogen code support for squared_l2_norm_op * Update ops.yaml --- paddle/fluid/operators/squared_l2_norm_op.cc | 89 -------------------- paddle/phi/api/yaml/backward.yaml | 10 +++ paddle/phi/api/yaml/legacy_backward.yaml | 10 --- paddle/phi/api/yaml/legacy_ops.yaml | 9 -- paddle/phi/api/yaml/op_compat.yaml | 7 ++ paddle/phi/api/yaml/ops.yaml | 9 ++ paddle/phi/ops/compat/squared_l2_norm_sig.cc | 35 -------- 7 files changed, 26 insertions(+), 143 deletions(-) delete mode 100644 paddle/fluid/operators/squared_l2_norm_op.cc delete mode 100644 paddle/phi/ops/compat/squared_l2_norm_sig.cc diff --git a/paddle/fluid/operators/squared_l2_norm_op.cc b/paddle/fluid/operators/squared_l2_norm_op.cc deleted file mode 100644 index 2e97f5b9b0dc2..0000000000000 --- a/paddle/fluid/operators/squared_l2_norm_op.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class SquaredL2NormOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; -}; - -template -class SquaredL2NormGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("squared_l2_norm_grad"); - - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput("X", this->Input("X")); - - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - - op->SetAttrMap(this->Attrs()); - } -}; - -class SquaredL2NormGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; -}; - -class SquaredL2NormOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor) The input of squared_l2_norm op."); - AddOutput("Out", "(Scalar) The output of squared_l2_norm op."); - AddComment(R"DOC( -SquaredL2Norm Operator. - -Computes the squared L2 norm of a tensor. - -$$Out = \sum_{i} X_{i}^2$$ - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -DECLARE_INFER_SHAPE_FUNCTOR(squared_l2_norm, - SquaredL2NormInferShapeFunctor, - PD_INFER_META(phi::SquaredL2NormInferMeta)); - -DECLARE_INFER_SHAPE_FUNCTOR(squared_l2_norm_grad, - SquaredL2NormGradInferShapeFunctor, - PD_INFER_META(phi::UnchangedInferMeta)); - -REGISTER_OPERATOR(squared_l2_norm, - ops::SquaredL2NormOp, - ops::SquaredL2NormOpMaker, - ops::SquaredL2NormGradOpMaker, - ops::SquaredL2NormGradOpMaker, - SquaredL2NormInferShapeFunctor); - -REGISTER_OPERATOR(squared_l2_norm_grad, - ops::SquaredL2NormGradOp, - SquaredL2NormGradInferShapeFunctor); diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 7bf3b5cd2fcd8..d288f0bf18f6a 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -1718,6 +1718,16 @@ backward : square_double_grad inplace : (out_grad -> x_grad) +- backward_op : squared_l2_norm_grad + forward : squared_l2_norm(Tensor x) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : squared_l2_norm_grad + - backward_op : squeeze_double_grad forward : squeeze_grad(Tensor xshape, Tensor grad_out, IntArray axis) -> Tensor(grad_x) args : (Tensor grad_x_grad, IntArray axis) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 4e21865c23b31..3a67b3e4a3e46 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -962,16 +962,6 @@ invoke : concat( out_grad, axis) composite : split_grad(out_grad, axis, x_grad) -- backward_op : squared_l2_norm_grad - forward : squared_l2_norm(Tensor x) -> Tensor(out) - args : (Tensor x, Tensor out_grad) - output : Tensor(x_grad) - infer_meta : - func : UnchangedInferMeta - param: [x] - kernel : - func : squared_l2_norm_grad - - backward_op : strided_slice_grad forward : strided_slice (Tensor x, int[] axes, IntArray starts, IntArray ends, IntArray strides) -> Tensor(out) args : (Tensor x, Tensor out_grad, int[] axes, IntArray starts, IntArray ends, IntArray strides) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index b075b1935e1bb..100329f555bea 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -1208,15 +1208,6 @@ func : split_with_num backward : split_with_num_grad -- op : squared_l2_norm - args : (Tensor x) - output : Tensor - infer_meta : - func : SquaredL2NormInferMeta - kernel : - func : squared_l2_norm - backward : squared_l2_norm_grad - - op : strided_slice args : (Tensor x, int[] axes, IntArray starts, IntArray ends, IntArray strides) output : Tensor diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index e53909aa3fdee..bfbab2d52af4e 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -2322,3 +2322,10 @@ {x: X, label: Label} outputs : out : Out + +- op: squared_l2_norm + backward: squared_l2_norm_grad + inputs : + x : X + outputs : + out : Out diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index aed95190bcfe5..980505ddeb2f1 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1765,6 +1765,15 @@ square_sr {selected_rows -> selected_rows} backward : square_grad +- op : squared_l2_norm + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : SquaredL2NormInferMeta + kernel : + func : squared_l2_norm + backward : squared_l2_norm_grad + - op : squeeze args : (Tensor x, IntArray axis={}) output : Tensor(out), Tensor(xshape) diff --git a/paddle/phi/ops/compat/squared_l2_norm_sig.cc b/paddle/phi/ops/compat/squared_l2_norm_sig.cc deleted file mode 100644 index 7b228008f2839..0000000000000 --- a/paddle/phi/ops/compat/squared_l2_norm_sig.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature SquaredL2NormOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("squared_l2_norm", {"X"}, {}, {"Out"}); -} - -KernelSignature SquaredL2NormGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "squared_l2_norm_grad", {"X", "Out@GRAD"}, {}, {"X@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(squared_l2_norm, - phi::SquaredL2NormOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(squared_l2_norm_grad, - phi::SquaredL2NormGradOpArgumentMapping); From a64d50b720b9cf4354a1dd45bdbdfcc639c15b18 Mon Sep 17 00:00:00 2001 From: liuruyan <44316842+liuruyan@users.noreply.github.com> Date: Wed, 12 Apr 2023 15:48:22 +0800 Subject: [PATCH 49/59] Add layer func: float(), half(), bfloat16(). (#51635) --- paddle/fluid/pybind/place.cc | 24 +++- paddle/fluid/pybind/pybind.cc | 11 -- python/paddle/amp/__init__.py | 66 +++++++++- python/paddle/nn/layer/layers.py | 176 ++++++++++++++++++++++++++- test/amp/test_layer_convert_dtype.py | 172 ++++++++++++++++++++++++++ 5 files changed, 434 insertions(+), 15 deletions(-) create mode 100644 test/amp/test_layer_convert_dtype.py diff --git a/paddle/fluid/pybind/place.cc b/paddle/fluid/pybind/place.cc index d1d336b5bb009..aec21c6b0f629 100644 --- a/paddle/fluid/pybind/place.cc +++ b/paddle/fluid/pybind/place.cc @@ -373,7 +373,16 @@ void BindPlace(pybind11::module &m) { // NOLINT #endif .def("__repr__", string::to_string) .def("__str__", string::to_string); - +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + m.def("is_float16_supported", [](const platform::CUDAPlace &place) -> bool { + // Only GPUs with Compute Capability >= 53 support float16 + return platform::GetGPUComputeCapability(place.device) >= 53; + }); + m.def("is_bfloat16_supported", [](const platform::CUDAPlace &place) -> bool { + // Only GPUs with Compute Capability >= 80 support bfloat16 + return platform::GetGPUComputeCapability(place.device) >= 80; + }); +#endif py::class_ xpuplace(m, "XPUPlace", R"DOC( **Note**: Examples: @@ -492,7 +501,18 @@ void BindPlace(pybind11::module &m) { // NOLINT &IsSamePlace) .def("__repr__", string::to_string) .def("__str__", string::to_string); - + m.def("is_float16_supported", + [](const platform::CPUPlace &place) -> bool { return false; }); + m.def("is_bfloat16_supported", [](const platform::CPUPlace &place) -> bool { +#ifndef PADDLE_WITH_MKLDNN + return false; +#else + if (phi::backends::cpu::MayIUse(phi::backends::cpu::cpu_isa_t::avx512_core)) + return true; + else + return false; +#endif + }); py::class_ cudapinnedplace( m, "CUDAPinnedPlace", R"DOC( CUDAPinnedPlace is a descriptor of a device. diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 65aa609e34fde..bde6357ccbe2f 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1960,17 +1960,6 @@ All parameter, weight, gradient are variables in Paddle. py::arg("sleep_inter") = 0, py::arg("redirect_stderr") = false); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - m.def("is_float16_supported", [](const platform::CUDAPlace &place) -> bool { - // Only GPUs with Compute Capability >= 53 support float16 - return platform::GetGPUComputeCapability(place.device) >= 53; - }); - m.def("is_bfloat16_supported", [](const platform::CUDAPlace &place) -> bool { - // Only GPUs with Compute Capability >= 80 support bfloat16 - return platform::GetGPUComputeCapability(place.device) >= 80; - }); -#endif - m.def("set_feed_variable", static_cast= 8.0, + "run test when maximum gpu's compute capability is 8.0.", + ) + def test_unsupported_bfloat16(self): + self.verify_trans_dtype( + test_type='bfloat16', + corrected_dtype=paddle.float32, + ) + + @unittest.skipIf( + not core.is_compiled_with_cuda() + or paddle.device.cuda.get_device_capability()[0] < 8.0, + "run test when gpu's compute capability is at least 8.0.", + ) + def test_supported_bfloat16(self): + self.verify_trans_dtype( + test_type='bfloat16', + corrected_dtype=paddle.bfloat16, + ) + + def test_float32(self): + paddle.set_default_dtype('float16') + self.verify_trans_dtype( + test_type='float32', + corrected_dtype=paddle.float32, + ) + paddle.set_default_dtype('float32') + + def test_excluded_layers_type_error(self): + self.assertRaises( + TypeError, self.verify_trans_dtype, excluded_layers=111 + ) + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), "Require compiled with CUDA." +) +class TestSupportedTypeInfo(unittest.TestCase): + def test_cpu(self): + res = paddle.amp.is_float16_supported('cpu') + self.assertEqual(res, False) + res = paddle.amp.is_bfloat16_supported('cpu') + self.assertEqual(res, True) + + def test_gpu_fp16_supported(self): + res = paddle.amp.is_float16_supported() + self.assertEqual(res, True) + res = paddle.amp.is_float16_supported('gpu') + self.assertEqual(res, True) + res = paddle.amp.is_float16_supported('gpu:0') + self.assertEqual(res, True) + + @unittest.skipIf( + not core.is_compiled_with_cuda() + or paddle.device.cuda.get_device_capability()[0] >= 8.0, + "run test when maximum gpu's compute capability is 8.0.", + ) + def test_gpu_bf16_unsupported(self): + res = paddle.amp.is_bfloat16_supported() + self.assertEqual(res, False) + res = paddle.amp.is_bfloat16_supported('gpu') + self.assertEqual(res, False) + + @unittest.skipIf( + not core.is_compiled_with_cuda() + or paddle.device.cuda.get_device_capability()[0] < 8.0, + "run test when gpu's compute capability is at least 8.0.", + ) + def test_gpu_bf16_supported(self): + res = paddle.amp.is_bfloat16_supported() + self.assertEqual(res, True) + res = paddle.amp.is_bfloat16_supported('gpu') + self.assertEqual(res, True) + + def test_device_value_error(self): + self.assertRaises( + ValueError, paddle.amp.is_float16_supported, device='xxx' + ) + self.assertRaises( + ValueError, paddle.amp.is_float16_supported, device=111 + ) + + +if __name__ == '__main__': + unittest.main() From 9f2e30641929eeb35426ff39d1ef41b7c235eb67 Mon Sep 17 00:00:00 2001 From: xiaoguoguo626807 <100397923+xiaoguoguo626807@users.noreply.github.com> Date: Wed, 12 Apr 2023 15:49:26 +0800 Subject: [PATCH 50/59] recover multiply prune (#52713) --- paddle/fluid/eager/auto_code_generator/generator/eager_gen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index e22355d88d329..4e105d138b7e8 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -73,7 +73,6 @@ # bacward api's output usually affected by backward api's input special_prune_dict = { "matmul_grad": {"x": "grad_y", "y": "grad_x"}, - "multiply_grad": {"x": "grad_y", "y": "grad_x"}, } From cea62c00248385e1a058dff1d94caa6477c4c031 Mon Sep 17 00:00:00 2001 From: WangZhen <23097963+0x45f@users.noreply.github.com> Date: Wed, 12 Apr 2023 16:04:46 +0800 Subject: [PATCH 51/59] Eval during train for ResNet (#52768) * Eval during train for ResNet --- test/prim/model/test_resnet_prim_cinn.py | 78 ++++++++++++++---------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/test/prim/model/test_resnet_prim_cinn.py b/test/prim/model/test_resnet_prim_cinn.py index eee27720313de..46ea9bfba72a7 100644 --- a/test/prim/model/test_resnet_prim_cinn.py +++ b/test/prim/model/test_resnet_prim_cinn.py @@ -131,31 +131,13 @@ def optimizer_setting(parameter_list=None): return optimizer -def train(to_static, enable_prim, enable_cinn): - if core.is_compiled_with_cuda(): - paddle.set_device('gpu') - else: - paddle.set_device('cpu') - np.random.seed(SEED) - paddle.seed(SEED) - paddle.framework.random._manual_program_seed(SEED) - fluid.core._set_prim_all_enabled(enable_prim) - - train_reader = paddle.batch( - reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), - batch_size=batch_size, - drop_last=True, - ) - data_loader = fluid.io.DataLoader.from_generator(capacity=5, iterable=True) - data_loader.set_sample_list_generator(train_reader) - - resnet = resnet50(False) - if to_static: - build_strategy = paddle.static.BuildStrategy() - if enable_cinn: - build_strategy.build_cinn_pass = True - resnet = paddle.jit.to_static(resnet, build_strategy=build_strategy) - optimizer = optimizer_setting(parameter_list=resnet.parameters()) +def run(model, data_loader, optimizer, mode): + if mode == 'train': + model.train() + end_step = 9 + elif mode == 'eval': + model.eval() + end_step = 1 for epoch in range(epoch_num): total_acc1 = 0.0 @@ -167,7 +149,7 @@ def train(to_static, enable_prim, enable_cinn): start_time = time.time() img, label = data - pred = resnet(img) + pred = model(img) avg_loss = paddle.nn.functional.cross_entropy( input=pred, label=label, @@ -179,9 +161,10 @@ def train(to_static, enable_prim, enable_cinn): acc_top1 = paddle.static.accuracy(input=pred, label=label, k=1) acc_top5 = paddle.static.accuracy(input=pred, label=label, k=5) - avg_loss.backward() - optimizer.minimize(avg_loss) - resnet.clear_gradients() + if mode == 'train': + avg_loss.backward() + optimizer.minimize(avg_loss) + model.clear_gradients() total_acc1 += acc_top1 total_acc5 += acc_top5 @@ -190,8 +173,9 @@ def train(to_static, enable_prim, enable_cinn): end_time = time.time() print( - "epoch %d | batch step %d, loss %0.8f, acc1 %0.3f, acc5 %0.3f, time %f" + "[%s]epoch %d | batch step %d, loss %0.8f, acc1 %0.3f, acc5 %0.3f, time %f" % ( + mode, epoch, batch_id, avg_loss, @@ -200,7 +184,7 @@ def train(to_static, enable_prim, enable_cinn): end_time - start_time, ) ) - if batch_id >= 9: + if batch_id >= end_step: # avoid dataloader throw abort signaal data_loader._reset() break @@ -208,6 +192,38 @@ def train(to_static, enable_prim, enable_cinn): return losses +def train(to_static, enable_prim, enable_cinn): + if core.is_compiled_with_cuda(): + paddle.set_device('gpu') + else: + paddle.set_device('cpu') + np.random.seed(SEED) + paddle.seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + fluid.core._set_prim_all_enabled(enable_prim) + + train_reader = paddle.batch( + reader_decorator(paddle.dataset.flowers.train(use_xmap=False)), + batch_size=batch_size, + drop_last=True, + ) + data_loader = fluid.io.DataLoader.from_generator(capacity=5, iterable=True) + data_loader.set_sample_list_generator(train_reader) + + resnet = resnet50(False) + if to_static: + build_strategy = paddle.static.BuildStrategy() + if enable_cinn: + build_strategy.build_cinn_pass = True + resnet = paddle.jit.to_static(resnet, build_strategy=build_strategy) + optimizer = optimizer_setting(parameter_list=resnet.parameters()) + + train_losses = run(resnet, data_loader, optimizer, 'train') + if to_static and enable_prim and enable_cinn: + eval_losses = run(resnet, data_loader, optimizer, 'eval') + return train_losses + + class TestResnet(unittest.TestCase): @unittest.skipIf( not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()), From 8e7c37894f8381d5e9662aa2974c4bbeac9b628e Mon Sep 17 00:00:00 2001 From: zhupengyang Date: Wed, 12 Apr 2023 16:26:20 +0800 Subject: [PATCH 52/59] cache scope in while (#52628) --- .../fluid/operators/controlflow/while_op.cc | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 30fdb90ce1069..4c7578c010473 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -22,6 +22,13 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif + +PADDLE_DEFINE_EXPORTED_bool( + cache_inference_while_scope, + false, + "Cache the scope of the while op to avoid repeated creation of the scope " + "for each iteration and improve inference performance."); + namespace paddle { namespace framework { class InferShapeContext; @@ -257,14 +264,23 @@ class WhileOp : public framework::OperatorBase { scope.FindVar(Input(kCondition))->Get()); } } else { - auto ¤t_scope = scope.NewScope(); - - BuildScopeForControlFlowOp(*core_, *block, ¤t_scope); - core_->reset_scope(¤t_scope); + framework::Scope *current_scope = nullptr; + if (!FLAGS_cache_inference_while_scope) { + current_scope = &(scope.NewScope()); + BuildScopeForControlFlowOp(*core_, *block, current_scope); + core_->reset_scope(current_scope); + } else { + if (cached_inference_scope_ == nullptr) { + cached_inference_scope_ = &(scope.NewScope()); + BuildScopeForControlFlowOp(*core_, *block, cached_inference_scope_); + core_->reset_scope(cached_inference_scope_); + } + current_scope = cached_inference_scope_; + } while (cond_data) { - for (auto &name : current_scope.LocalVarNames()) { - auto *var = current_scope.Var(name); + for (auto &name : current_scope->LocalVarNames()) { + auto *var = current_scope->Var(name); if (var->IsType()) { // Clear all lod information for all lod_tensors. auto *t = var->GetMutable(); @@ -283,7 +299,9 @@ class WhileOp : public framework::OperatorBase { scope.FindVar(Input(kCondition))->Get()); } - scope.DeleteScope(¤t_scope); + if (!FLAGS_cache_inference_while_scope) { + scope.DeleteScope(current_scope); + } } } @@ -291,6 +309,7 @@ class WhileOp : public framework::OperatorBase { mutable std::shared_ptr executor_{nullptr}; mutable std::unique_ptr ctx_{nullptr}; mutable std::shared_ptr core_{nullptr}; + mutable framework::Scope *cached_inference_scope_{nullptr}; }; class WhileOpMaker : public framework::OpProtoAndCheckerMaker { From c376a9408d76bfa58869d4c35a54bf4b25c28923 Mon Sep 17 00:00:00 2001 From: gouzil <66515297+gouzil@users.noreply.github.com> Date: Wed, 12 Apr 2023 16:53:35 +0800 Subject: [PATCH 53/59] [phi] mv sequence_pool to phi - Step 1 : sequence_pooling_test (#52782) * [phi] mv sequence_pooling_test * [test] fix include --- paddle/fluid/operators/math/CMakeLists.txt | 4 -- test/cpp/phi/kernels/CMakeLists.txt | 5 +++ .../cpp/phi/kernels}/sequence_pooling_test.cc | 43 ++++++++++--------- 3 files changed, 28 insertions(+), 24 deletions(-) rename {paddle/fluid/operators/math => test/cpp/phi/kernels}/sequence_pooling_test.cc (81%) diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 6975873b13796..42cb92db8625e 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -39,10 +39,6 @@ cc_test( vol2col_test SRCS vol2col_test.cc DEPS vol2col) -cc_test( - sequence_pooling_test - SRCS sequence_pooling_test.cc - DEPS sequence_pooling) cc_test( beam_search_test SRCS beam_search_test.cc diff --git a/test/cpp/phi/kernels/CMakeLists.txt b/test/cpp/phi/kernels/CMakeLists.txt index a9e897eb614dc..3e7f394f186da 100644 --- a/test/cpp/phi/kernels/CMakeLists.txt +++ b/test/cpp/phi/kernels/CMakeLists.txt @@ -105,3 +105,8 @@ cc_test( sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) + +cc_test( + sequence_pooling_test + SRCS sequence_pooling_test.cc + DEPS sequence_pooling) diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/test/cpp/phi/kernels/sequence_pooling_test.cc similarity index 81% rename from paddle/fluid/operators/math/sequence_pooling_test.cc rename to test/cpp/phi/kernels/sequence_pooling_test.cc index dac5eb63bfc13..3c12d55ed360f 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/test/cpp/phi/kernels/sequence_pooling_test.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,13 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/fluid/operators/math/sequence_pooling.h" -#include +#include "paddle/phi/backends/context_pool.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/tensor_utils.h" template void TestSequencePoolingSum(const DeviceContext &context, - const paddle::framework::LoD &lod, + const phi::LoD &lod, const int64_t second_dim) { phi::DenseTensor cpu_out_grad; phi::DenseTensor cpu_in_grad; @@ -30,17 +34,17 @@ void TestSequencePoolingSum(const DeviceContext &context, auto out_dims = phi::make_ddim({static_cast(out_first_dim), second_dim}); - cpu_out_grad.mutable_data(out_dims, paddle::platform::CPUPlace()); + cpu_out_grad.mutable_data(out_dims, phi::CPUPlace()); for (int64_t i = 0; i < cpu_out_grad.numel(); ++i) { cpu_out_grad.data()[i] = static_cast(i); } // copy to dst out_grad auto place = context.GetPlace(); - if (paddle::platform::is_cpu_place(place)) { + if (place == phi::CPUPlace()) { out_grad = cpu_out_grad; } else { - paddle::framework::TensorCopySync(cpu_out_grad, place, &out_grad); + phi::Copy(context, cpu_out_grad, place, true, &out_grad); } // construct in_grad @@ -53,7 +57,7 @@ void TestSequencePoolingSum(const DeviceContext &context, PADDLE_ENFORCE_EQ( in_grad.dims().size(), out_grad.dims().size(), - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of input and output shall be same. Expected %ld == " "%ld, but got %ld != %ld. Please check the input value.", in_grad.dims().size(), @@ -64,7 +68,7 @@ void TestSequencePoolingSum(const DeviceContext &context, PADDLE_ENFORCE_EQ( in_grad.dims()[i], out_grad.dims()[i], - paddle::platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The dimension of input and output shall be same. Expected %ld == " "%ld, but got %ld != %ld. Please check the input value.", in_grad.dims()[i], @@ -77,18 +81,17 @@ void TestSequencePoolingSum(const DeviceContext &context, paddle::operators::math::SequencePoolGradFunctor()( context, "SUM", out_grad, &in_grad); - if (paddle::platform::is_cpu_place(place)) { + if (place == phi::CPUPlace()) { cpu_in_grad = in_grad; } else { - paddle::framework::TensorCopySync( - in_grad, paddle::platform::CPUPlace(), &cpu_in_grad); + phi::Copy(context, in_grad, phi::CPUPlace(), true, &cpu_in_grad); cpu_in_grad.set_lod(in_grad.lod()); } EXPECT_EQ(in_grad.numel(), static_cast(lod[0].back() * second_dim)); EXPECT_EQ(in_grad.lod(), lod); - if (paddle::platform::is_cpu_place(place)) { + if (place == phi::CPUPlace()) { for (size_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { int64_t begin = in_grad.lod()[0][i]; int64_t end = in_grad.lod()[0][i + 1]; @@ -116,30 +119,30 @@ void TestSequencePoolingSum(const DeviceContext &context, } TEST(SequencePoolingGrad, CPU_SUM) { - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); auto *context = static_cast( - paddle::platform::DeviceContextPool::Instance().Get(place)); + phi::DeviceContextPool::Instance().Get(place)); - paddle::framework::LoD lod1; + phi::LoD lod1; lod1.push_back(std::vector{0, 10}); TestSequencePoolingSum(*context, lod1, 128); - paddle::framework::LoD lod2; + phi::LoD lod2; lod2.push_back(std::vector{0, 2, 7, 10}); TestSequencePoolingSum(*context, lod2, 128); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) TEST(SequencePoolingGrad, CUDA_SUM) { - auto place = paddle::platform::CUDAPlace(0); + auto place = phi::GPUPlace(0); auto *context = static_cast( - paddle::platform::DeviceContextPool::Instance().Get(place)); + phi::DeviceContextPool::Instance().Get(place)); - paddle::framework::LoD lod1; + phi::LoD lod1; lod1.push_back(std::vector{0, 10}); TestSequencePoolingSum(*context, lod1, 128); - paddle::framework::LoD lod2; + phi::LoD lod2; lod2.push_back(std::vector{0, 2, 7, 10}); TestSequencePoolingSum(*context, lod2, 128); } From 8cbeefea9d7cba2de98574eafa12b87daab7af1e Mon Sep 17 00:00:00 2001 From: Zhang Zheng <32410583+ZzSean@users.noreply.github.com> Date: Wed, 12 Apr 2023 17:07:33 +0800 Subject: [PATCH 54/59] Optimize performance of unique kernel (#52736) * Optimize performance of unique kernel * fix ci --- paddle/phi/kernels/gpu/unique_kernel.cu | 141 +++++++----------------- 1 file changed, 41 insertions(+), 100 deletions(-) diff --git a/paddle/phi/kernels/gpu/unique_kernel.cu b/paddle/phi/kernels/gpu/unique_kernel.cu index c073708ed8556..10cf1ea8df534 100644 --- a/paddle/phi/kernels/gpu/unique_kernel.cu +++ b/paddle/phi/kernels/gpu/unique_kernel.cu @@ -30,6 +30,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/unique_functor.h" +#include "paddle/phi/kernels/index_select_kernel.h" namespace phi { @@ -98,76 +99,6 @@ struct BinaryNotEqual { } }; -// index_select() function for DenseTensor -template -void IndexSelect(const Context& context, - const DenseTensor& input, - const DenseTensor& index, - DenseTensor* output, - int dim) { - auto input_dim = input.dims(); - auto input_dim_size = input_dim.size(); - auto output_dim = output->dims(); - - auto slice_size = 1; - for (auto i = dim + 1; i < input_dim_size; i++) { - slice_size *= input_dim[i]; - } - - auto input_width = slice_size * input_dim[dim]; - auto output_width = slice_size * output_dim[dim]; - - auto outer_nums = 1; - for (auto i = 0; i < dim; i++) { - outer_nums *= input_dim[i]; - } - - auto index_size = index.dims()[0]; - - std::vector input_vec; - std::vector index_vec; - phi::TensorToVector(input, context, &input_vec); - phi::TensorToVector(index, context, &index_vec); - std::vector out_vec(output->numel()); - - for (int i = 0; i < index_size; i++) { - PADDLE_ENFORCE_GE( - index_vec[i], - 0, - phi::errors::InvalidArgument( - "Variable value (index) of OP(index_select) " - "expected >= 0 and < %ld, but got %ld. Please check input " - "value.", - input_dim[dim], - index_vec[i])); - PADDLE_ENFORCE_LT( - index_vec[i], - input_dim[dim], - phi::errors::InvalidArgument( - "Variable value (index) of OP(index_select) " - "expected >= 0 and < %ld, but got %ld. Please check input " - "value.", - input_dim[dim], - index_vec[i])); - } - - for (auto i = 0; i < outer_nums; i++) { - auto input_start_offset = i * input_width; - auto output_start_offset = i * output_width; - - for (auto j = 0; j < index_size; j++) { - IndexT index_value = index_vec[j]; - for (auto k = 0; k < slice_size; k++) { - out_vec[output_start_offset + j * slice_size + k] = - input_vec[input_start_offset + index_value * slice_size + k]; - } - } - } - context.template Alloc(output); - phi::TensorFromVector(out_vec, context, output); - output->Resize(output_dim); -} - // The core logic of computing Unique for a flattend DenseTensor template [dim1, dim0, dim2] - std::vector permute(in.dims().size()); - std::iota(permute.begin(), permute.end(), 0); - permute[axis] = 0; - permute[0] = axis; - std::vector in_trans_dims_vec(phi::vectorize(in.dims())); - in_trans_dims_vec[axis] = in.dims()[0]; - in_trans_dims_vec[0] = in.dims()[axis]; DenseTensor in_trans; + std::vector in_trans_dims_vec(phi::vectorize(in.dims())); auto in_trans_dims = phi::make_ddim(in_trans_dims_vec); - in_trans.Resize(in_trans_dims); - context.template Alloc(&in_trans); - phi::funcs::TransCompute( - in.dims().size(), // num of dims - context, // device - in, // original DenseTensor - &in_trans, // DenseTensor after reshape - permute); // index of axis - + std::vector permute(in.dims().size()); + bool is_transpose = axis != 0; + if (is_transpose) { + std::iota(permute.begin(), permute.end(), 0); + permute[axis] = 0; + permute[0] = axis; + in_trans_dims_vec[axis] = in.dims()[0]; + in_trans_dims_vec[0] = in.dims()[axis]; + in_trans_dims = phi::make_ddim(in_trans_dims_vec); + in_trans.Resize(in_trans_dims); + context.template Alloc(&in_trans); + phi::funcs::TransCompute( + in.dims().size(), // num of dims + context, // device + in, // original DenseTensor + &in_trans, // DenseTensor after reshape + permute); // index of axis + } else { + in_trans.ShareDataWith(in); + } // Reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] auto in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); in_trans.Resize(in_trans_flat_dims); @@ -407,22 +343,27 @@ static void UniqueDimsCUDATensor(const Context& context, row); // 3. Select indices and reshape back to get 'out' - DenseTensor out_trans; std::vector out_trans_dims_vec = in_trans_dims_vec; out_trans_dims_vec[0] = indices->numel(); - out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); - context.template Alloc(&out_trans); - - IndexSelect(context, in_trans, *indices, &out_trans, 0); - - std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); - out->Resize(phi::make_ddim(out_trans_dims_vec)); - context.template Alloc(out); - std::vector out_trans_unbind = phi::funcs::Unbind(out_trans); - phi::funcs::ConcatFunctor concat_functor; - concat_functor(context, out_trans_unbind, 0, &out_trans); - phi::funcs::TransCompute( - out_trans.dims().size(), context, out_trans, out, permute); + if (is_transpose) { + DenseTensor out_trans; + out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); + context.template Alloc(&out_trans); + + phi::IndexSelectKernel( + context, in_trans, *indices, 0, &out_trans); + + std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); + out->Resize(phi::make_ddim(out_trans_dims_vec)); + context.template Alloc(out); + phi::funcs::TransCompute( + out_trans.dims().size(), context, out_trans, out, permute); + } else { + out->Resize(phi::make_ddim(out_trans_dims_vec)); + context.template Alloc(out); + + phi::IndexSelectKernel(context, in_trans, *indices, 0, out); + } } // functor for processing a flattend DenseTensor From fd97d7d107f043b9389d5f8a424d981d13b820de Mon Sep 17 00:00:00 2001 From: zhangbo9674 <82555433+zhangbo9674@users.noreply.github.com> Date: Wed, 12 Apr 2023 17:11:33 +0800 Subject: [PATCH 55/59] [IR] Value system && Operation (#51992) * add Value OpResult OpOperand class * add Value OpResult OpOperand class * fix bug * fix bug * add utils * refine code * add ptr offset and reset method * add value impl * fix bug * refine comment of ValueImpl * refine code of OpResult * refine code of Value * add some comment * fix cpu compile bug * refine code * add op * add method for op & test value * refine unittest * refine code by comment * refine code * refine code * refine code * refine code --- paddle/ir/builtin_attribute.h | 9 +- paddle/ir/builtin_attribute_storage.cc | 3 +- paddle/ir/builtin_attribute_storage.h | 4 - paddle/ir/builtin_type_storage.h | 26 ++-- paddle/ir/op_base.h | 37 +++++ paddle/ir/operation.cc | 173 ++++++++++++++++++++++ paddle/ir/operation.h | 57 +++++++ paddle/ir/tests/CMakeLists.txt | 1 + paddle/ir/tests/ir_value_test.cc | 98 +++++++++++++ paddle/ir/tests/type_test.cc | 10 +- paddle/ir/utils.cc | 58 ++++++++ paddle/ir/utils.h | 28 ++++ paddle/ir/value.cc | 183 +++++++++++++++++++++++ paddle/ir/value.h | 137 +++++++++++++++++ paddle/ir/value_impl.h | 196 +++++++++++++++++++++++++ 15 files changed, 988 insertions(+), 32 deletions(-) create mode 100644 paddle/ir/op_base.h create mode 100644 paddle/ir/operation.cc create mode 100644 paddle/ir/operation.h create mode 100644 paddle/ir/tests/ir_value_test.cc create mode 100644 paddle/ir/utils.cc create mode 100644 paddle/ir/utils.h create mode 100644 paddle/ir/value.cc create mode 100644 paddle/ir/value.h create mode 100644 paddle/ir/value_impl.h diff --git a/paddle/ir/builtin_attribute.h b/paddle/ir/builtin_attribute.h index 4572617ea57ec..82b5f8eb48aa5 100644 --- a/paddle/ir/builtin_attribute.h +++ b/paddle/ir/builtin_attribute.h @@ -16,6 +16,7 @@ #include "paddle/ir/attribute.h" #include "paddle/ir/builtin_attribute_storage.h" +#include "paddle/ir/utils.h" namespace ir { /// @@ -82,15 +83,11 @@ class DictionaryAttribute : public ir::Attribute { } // namespace ir namespace std { -static std::size_t hash_combine(std::size_t lhs, std::size_t rhs) { - return lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); -} - template <> struct hash { std::size_t operator()(const ir::NamedAttribute &obj) const { - return hash_combine(std::hash()(obj.name_), - std::hash()(obj.value_)); + return ir::hash_combine(std::hash()(obj.name_), + std::hash()(obj.value_)); } }; } // namespace std diff --git a/paddle/ir/builtin_attribute_storage.cc b/paddle/ir/builtin_attribute_storage.cc index 961319bc4a94e..c7feacae4d64a 100644 --- a/paddle/ir/builtin_attribute_storage.cc +++ b/paddle/ir/builtin_attribute_storage.cc @@ -14,6 +14,7 @@ #include "paddle/ir/builtin_attribute_storage.h" #include "paddle/ir/builtin_attribute.h" +#include "paddle/ir/utils.h" namespace ir { @@ -32,7 +33,7 @@ DictionaryAttributeStorage::DictionaryAttributeStorage(const ParamKey &key) { std::size_t DictionaryAttributeStorage::HashValue(const ParamKey &key) { std::size_t hash_value = key.size(); for (auto iter = key.begin(); iter != key.end(); ++iter) { - hash_value = hash_combine( + hash_value = ir::hash_combine( hash_value, std::hash()(NamedAttribute(iter->first, iter->second))); } diff --git a/paddle/ir/builtin_attribute_storage.h b/paddle/ir/builtin_attribute_storage.h index a0fdca9f1e10f..a34648fb17e35 100644 --- a/paddle/ir/builtin_attribute_storage.h +++ b/paddle/ir/builtin_attribute_storage.h @@ -83,10 +83,6 @@ struct DictionaryAttributeStorage : public AttributeStorage { uint32_t size() const { return size_; } private: - static std::size_t hash_combine(std::size_t lhs, std::size_t rhs) { - return lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); - } - NamedAttribute *data_; uint32_t size_; }; diff --git a/paddle/ir/builtin_type_storage.h b/paddle/ir/builtin_type_storage.h index 876b6ceeffdce..132a1656a7975 100644 --- a/paddle/ir/builtin_type_storage.h +++ b/paddle/ir/builtin_type_storage.h @@ -17,6 +17,7 @@ #include #include "paddle/ir/type.h" +#include "paddle/ir/utils.h" namespace std { /// @@ -109,20 +110,22 @@ struct DenseTensorTypeStorage : public ir::TypeStorage { std::size_t hash_value = 0; // hash dtype hash_value = - hash_combine(hash_value, std::hash()(std::get<0>(key))); + ir::hash_combine(hash_value, std::hash()(std::get<0>(key))); // hash dims - hash_value = hash_combine(hash_value, std::hash()(std::get<1>(key))); - // hash layout hash_value = - hash_combine(hash_value, - std::hash::type>()( - static_cast::type>( - std::get<2>(key)))); + ir::hash_combine(hash_value, std::hash()(std::get<1>(key))); + // hash layout + hash_value = ir::hash_combine( + hash_value, + std::hash::type>()( + static_cast::type>( + std::get<2>(key)))); // hash lod - hash_value = hash_combine(hash_value, std::hash()(std::get<3>(key))); + hash_value = + ir::hash_combine(hash_value, std::hash()(std::get<3>(key))); // hash offset hash_value = - hash_combine(hash_value, std::hash()(std::get<4>(key))); + ir::hash_combine(hash_value, std::hash()(std::get<4>(key))); return hash_value; } @@ -146,11 +149,6 @@ struct DenseTensorTypeStorage : public ir::TypeStorage { DataLayout layout_; LoD lod_; size_t offset_; - - private: - static std::size_t hash_combine(std::size_t lhs, std::size_t rhs) { - return lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); - } }; } // namespace ir diff --git a/paddle/ir/op_base.h b/paddle/ir/op_base.h new file mode 100644 index 0000000000000..38ff4002c6b2b --- /dev/null +++ b/paddle/ir/op_base.h @@ -0,0 +1,37 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/ir/operation.h" + +namespace ir { +class OpBase { + public: + Operation *operation() { return operation_; } + + explicit operator bool() { return operation() != nullptr; } + + operator Operation *() const { return operation_; } + + Operation *operator->() const { return operation_; } + + protected: + explicit OpBase(Operation *operation) : operation_(operation) {} + + private: + Operation *operation_; +}; + +} // namespace ir diff --git a/paddle/ir/operation.cc b/paddle/ir/operation.cc new file mode 100644 index 0000000000000..e9d727f1b5fb3 --- /dev/null +++ b/paddle/ir/operation.cc @@ -0,0 +1,173 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/ir/operation.h" +#include "paddle/ir/utils.h" + +namespace ir { +// Allocate the required memory based on the size and number of inputs, outputs, +// and operators, and construct it in the order of: OpOutlineResult, +// OpInlineResult, Operation, Operand. +Operation *Operation::create(const std::vector &inputs, + const std::vector &output_types, + ir::DictionaryAttribute attribute) { + // 1. Calculate the required memory size for OpResults + Operation + + // OpOperands. + uint32_t num_results = output_types.size(); + uint32_t num_operands = inputs.size(); + uint32_t max_inline_result_num = + detail::OpResultImpl::GetMaxInlineResultIndex() + 1; + size_t result_mem_size = + num_results > max_inline_result_num + ? sizeof(detail::OpOutlineResultImpl) * + (num_results - max_inline_result_num) + + sizeof(detail::OpInlineResultImpl) * max_inline_result_num + : sizeof(detail::OpInlineResultImpl) * num_results; + size_t operand_mem_size = sizeof(detail::OpOperandImpl) * num_operands; + size_t op_mem_size = sizeof(Operation); + size_t base_size = result_mem_size + op_mem_size + operand_mem_size; + // 2. Malloc memory. + char *base_ptr = reinterpret_cast(aligned_malloc(base_size, 8)); + // 3.1. Construct OpResults. + for (size_t idx = num_results; idx > 0; idx--) { + if (idx > max_inline_result_num) { + new (base_ptr) + detail::OpOutlineResultImpl(output_types[idx - 1], idx - 1); + base_ptr += sizeof(detail::OpOutlineResultImpl); + } else { + new (base_ptr) detail::OpInlineResultImpl(output_types[idx - 1], idx - 1); + base_ptr += sizeof(detail::OpInlineResultImpl); + } + } + // 3.2. Construct Operation. + Operation *op = + new (base_ptr) Operation(num_results, num_operands, attribute); + base_ptr += sizeof(Operation); + // 3.3. Construct OpOperands. + if ((reinterpret_cast(base_ptr) & 0x7) != 0) { + throw("The address of OpOperandImpl must be divisible by 8."); + } + for (size_t idx = 0; idx < num_operands; idx++) { + new (base_ptr) detail::OpOperandImpl(inputs[idx].impl_, op); + base_ptr += sizeof(detail::OpOperandImpl); + } + VLOG(4) << "Construct an Operation: " << op->print(); + return op; +} + +// Call destructors for OpResults, Operation, and OpOperands in sequence, and +// finally free memory. +void Operation::destroy() { + // 1. Get aligned_ptr by result_num. + uint32_t max_inline_result_num = + detail::OpResultImpl::GetMaxInlineResultIndex() + 1; + size_t result_mem_size = + num_results_ > max_inline_result_num + ? sizeof(detail::OpOutlineResultImpl) * + (num_results_ - max_inline_result_num) + + sizeof(detail::OpInlineResultImpl) * max_inline_result_num + : sizeof(detail::OpInlineResultImpl) * num_results_; + char *aligned_ptr = reinterpret_cast(this) - result_mem_size; + // 2.1. Deconstruct OpResult. + char *base_ptr = aligned_ptr; + for (size_t idx = num_results_; idx > 0; idx--) { + if (!reinterpret_cast(base_ptr)->use_empty()) { + throw("Cannot destroy a value that still has uses!"); + } + if (idx > max_inline_result_num) { + reinterpret_cast(base_ptr) + ->~OpOutlineResultImpl(); + base_ptr += sizeof(detail::OpOutlineResultImpl); + } else { + reinterpret_cast(base_ptr) + ->~OpInlineResultImpl(); + base_ptr += sizeof(detail::OpInlineResultImpl); + } + } + // 2.2. Deconstruct Operation. + if (reinterpret_cast(base_ptr) != + reinterpret_cast(this)) { + throw("Operation address error"); + } + reinterpret_cast(base_ptr)->~Operation(); + base_ptr += sizeof(Operation); + // 2.3. Deconstruct OpOpOerand. + for (size_t idx = 0; idx < num_operands_; idx++) { + reinterpret_cast(base_ptr)->~OpOperandImpl(); + base_ptr += sizeof(detail::OpOperandImpl); + } + // 3. Free memory. + VLOG(4) << "Destroy an Operation: {ptr = " + << reinterpret_cast(aligned_ptr) + << ", size = " << result_mem_size << "}"; + aligned_free(reinterpret_cast(aligned_ptr)); +} + +Operation::Operation(uint32_t num_results, + uint32_t num_operands, + ir::DictionaryAttribute attribute) { + if (!attribute) { + throw("unexpected null attribute dictionary"); + } + num_results_ = num_results; + num_operands_ = num_operands; + attribute_ = attribute; +} + +ir::OpResult Operation::GetResultByIndex(uint32_t index) { + if (index >= num_results_) { + throw("index exceeds OP output range."); + } + uint32_t max_inline_idx = detail::OpResultImpl::GetMaxInlineResultIndex(); + char *ptr = nullptr; + if (index > max_inline_idx) { + ptr = reinterpret_cast(this) - + (max_inline_idx + 1) * sizeof(detail::OpInlineResultImpl) - + (index - max_inline_idx) * sizeof(detail::OpOutlineResultImpl); + } else { + ptr = reinterpret_cast(this) - + (index + 1) * sizeof(detail::OpInlineResultImpl); + } + if (index > max_inline_idx) { + detail::OpOutlineResultImpl *result_impl_ptr = + reinterpret_cast(ptr); + return ir::OpResult(result_impl_ptr); + } else { + detail::OpInlineResultImpl *result_impl_ptr = + reinterpret_cast(ptr); + return ir::OpResult(result_impl_ptr); + } +} + +std::string Operation::print() { + std::stringstream result; + result << "{ " << num_results_ << " outputs, " << num_operands_ + << " inputs } : "; + result << "[ "; + for (size_t idx = num_results_; idx > 0; idx--) { + result << GetResultByIndex(idx - 1).impl_ << ", "; + } + result << "] = "; + result << this << "( "; + for (size_t idx = 0; idx < num_operands_; idx++) { + result << reinterpret_cast(reinterpret_cast(this) + + sizeof(Operation) + + idx * sizeof(detail::OpOperandImpl)) + << ", "; + } + result << ")"; + return result.str(); +} + +} // namespace ir diff --git a/paddle/ir/operation.h b/paddle/ir/operation.h new file mode 100644 index 0000000000000..924dcafb73dfc --- /dev/null +++ b/paddle/ir/operation.h @@ -0,0 +1,57 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/ir/builtin_attribute.h" +#include "paddle/ir/type.h" +#include "paddle/ir/value_impl.h" + +namespace ir { + +class alignas(8) Operation final { + public: + /// + /// \brief Malloc memory and construct objects in the following order: + /// OpResultImpls|Operation|OpOperandImpls. + /// + static Operation *create(const std::vector &inputs, + const std::vector &output_types, + ir::DictionaryAttribute attribute); + + void destroy(); + + ir::OpResult GetResultByIndex(uint32_t index); + + std::string print(); + + ir::DictionaryAttribute attribute() { return attribute_; } + + uint32_t num_results() { return num_results_; } + + uint32_t num_operands() { return num_operands_; } + + private: + Operation(uint32_t num_results, + uint32_t num_operands, + ir::DictionaryAttribute attribute); + + ir::DictionaryAttribute attribute_; + + uint32_t num_results_ = 0; + + uint32_t num_operands_ = 0; +}; + +} // namespace ir diff --git a/paddle/ir/tests/CMakeLists.txt b/paddle/ir/tests/CMakeLists.txt index d94789fd05682..e012ec5bd264d 100644 --- a/paddle/ir/tests/CMakeLists.txt +++ b/paddle/ir/tests/CMakeLists.txt @@ -1,2 +1,3 @@ cc_test_old(type_test SRCS type_test.cc DEPS new_ir gtest) cc_test_old(ir_attribute_test SRCS ir_attribute_test.cc DEPS new_ir gtest) +cc_test_old(ir_value_test SRCS ir_value_test.cc DEPS new_ir gtest) diff --git a/paddle/ir/tests/ir_value_test.cc b/paddle/ir/tests/ir_value_test.cc new file mode 100644 index 0000000000000..c04e7c35128f4 --- /dev/null +++ b/paddle/ir/tests/ir_value_test.cc @@ -0,0 +1,98 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/ir/attribute.h" +#include "paddle/ir/builtin_attribute.h" +#include "paddle/ir/builtin_type.h" +#include "paddle/ir/ir_context.h" +#include "paddle/ir/operation.h" + +// This unittest is used to test the construction interfaces of value class and +// operation. The constructed test scenario is: a = OP1(); b = OP2(); c = OP3(a, +// b); d, e, f, g, h, i, j = OP4(a, c); + +ir::DictionaryAttribute CreateAttribute(std::string attribute_name, + std::string attribute) { + ir::IrContext *ctx = ir::IrContext::Instance(); + ir::StrAttribute attr_name = ir::StrAttribute::get(ctx, attribute_name); + ir::Attribute attr_value = ir::StrAttribute::get(ctx, attribute); + std::map named_attr; + named_attr.insert( + std::pair(attr_name, attr_value)); + return ir::DictionaryAttribute::get(ctx, named_attr); +} + +TEST(value_test, value_test) { + ir::IrContext *ctx = ir::IrContext::Instance(); + // 1. Construct OP1: a = OP1() + std::vector op1_inputs = {}; + std::vector op1_output_types = {ir::Float32Type::get(ctx)}; + ir::Operation *op1 = ir::Operation::create( + op1_inputs, op1_output_types, CreateAttribute("op1_name", "op1_attr")); + std::cout << op1->print() << std::endl; + // 2. Construct OP2: b = OP2(); + std::vector op2_inputs = {}; + std::vector op2_output_types = {ir::Float32Type::get(ctx)}; + ir::Operation *op2 = ir::Operation::create( + op2_inputs, op2_output_types, CreateAttribute("op2_name", "op2_attr")); + std::cout << op2->print() << std::endl; + // 3. Construct OP3: c = OP3(a, b); + std::vector op3_inputs = {op1->GetResultByIndex(0), + op2->GetResultByIndex(0)}; + std::vector op3_output_types = {ir::Float32Type::get(ctx)}; + ir::Operation *op3 = ir::Operation::create( + op3_inputs, op3_output_types, CreateAttribute("op3_name", "op3_attr")); + std::cout << op3->print() << std::endl; + // 4. Construct OP4: d, e, f, g, h, i, j = OP4(a, c); + std::vector op4_inputs = {op1->GetResultByIndex(0), + op3->GetResultByIndex(0)}; + std::vector op4_output_types; + for (size_t i = 0; i < 7; i++) { + op4_output_types.push_back(ir::Float32Type::get(ctx)); + } + ir::Operation *op4 = ir::Operation::create( + op4_inputs, op4_output_types, CreateAttribute("op4_name", "op4_attr")); + std::cout << op4->print() << std::endl; + + // Test 1: + EXPECT_EQ(op1->GetResultByIndex(0).GetDefiningOp(), op1); + EXPECT_EQ(op2->GetResultByIndex(0).GetDefiningOp(), op2); + EXPECT_EQ(op3->GetResultByIndex(0).GetDefiningOp(), op3); + EXPECT_EQ(op4->GetResultByIndex(6).GetDefiningOp(), op4); + + // Test 2: op1_first_output -> op4_first_input + ir::OpResult op1_first_output = op1->GetResultByIndex(0); + ir::detail::OpOperandImpl *op4_first_input = + reinterpret_cast( + reinterpret_cast(op4) + sizeof(ir::Operation)); + EXPECT_EQ(static_cast(op1_first_output).impl()->first_use(), + op4_first_input); + ir::detail::OpOperandImpl *op3_first_input = + reinterpret_cast( + reinterpret_cast(op3) + sizeof(ir::Operation)); + EXPECT_EQ(op4_first_input->next_use(), op3_first_input); + EXPECT_EQ(op3_first_input->next_use(), nullptr); + + // destroy + std::cout << op1->GetResultByIndex(0).print_ud_chain() << std::endl; + op4->destroy(); + std::cout << op1->GetResultByIndex(0).print_ud_chain() << std::endl; + op3->destroy(); + std::cout << op1->GetResultByIndex(0).print_ud_chain() << std::endl; + op2->destroy(); + std::cout << op1->GetResultByIndex(0).print_ud_chain() << std::endl; + op1->destroy(); +} diff --git a/paddle/ir/tests/type_test.cc b/paddle/ir/tests/type_test.cc index a11040e3656a5..d21afdcb80a59 100644 --- a/paddle/ir/tests/type_test.cc +++ b/paddle/ir/tests/type_test.cc @@ -21,6 +21,7 @@ #include "paddle/ir/ir_context.h" #include "paddle/ir/type.h" #include "paddle/ir/type_base.h" +#include "paddle/ir/utils.h" TEST(type_test, type_id) { // Define two empty classes, just for testing. @@ -172,8 +173,8 @@ struct IntegerTypeStorage : public ir::TypeStorage { using ParamKey = std::pair; static std::size_t HashValue(const ParamKey &key) { - return hash_combine(std::hash()(std::get<0>(key)), - std::hash()(std::get<1>(key))); + return ir::hash_combine(std::hash()(std::get<0>(key)), + std::hash()(std::get<1>(key))); } bool operator==(const ParamKey &key) const { @@ -188,11 +189,6 @@ struct IntegerTypeStorage : public ir::TypeStorage { unsigned width_ : 30; unsigned signedness_ : 2; - - private: - static std::size_t hash_combine(std::size_t lhs, std::size_t rhs) { - return lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); - } }; // Customize a parameterized type: IntegerType, storage type is diff --git a/paddle/ir/utils.cc b/paddle/ir/utils.cc new file mode 100644 index 0000000000000..9e6f1fcaf5790 --- /dev/null +++ b/paddle/ir/utils.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/ir/utils.h" + +namespace ir { +std::size_t hash_combine(std::size_t lhs, std::size_t rhs) { + return lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); +} + +void *aligned_malloc(size_t size, size_t alignment) { + assert(alignment >= sizeof(void *) && (alignment & (alignment - 1)) == 0); + size = (size + alignment - 1) / alignment * alignment; +#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L + void *aligned_mem = nullptr; + if (posix_memalign(&aligned_mem, alignment, size) != 0) { + aligned_mem = nullptr; + } + return aligned_mem; +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); +#else + void *mem = malloc(size + alignment); + if (mem == nullptr) { + return nullptr; + } + size_t adjust = alignment - reinterpret_cast(mem) % alignment; + void *aligned_mem = reinterpret_cast(mem) + adjust; + *(reinterpret_cast(aligned_mem) - 1) = mem; + assert(reinterpret_cast(aligned_mem) % alignment == 0); + return aligned_mem; +#endif +} + +void aligned_free(void *mem_ptr) { +#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L + free(mem_ptr); +#elif defined(_WIN32) + _aligned_free(mem_ptr); +#else + if (mem_ptr) { + free(*(reinterpret_cast(mem_ptr) - 1)); + } +#endif +} + +} // namespace ir diff --git a/paddle/ir/utils.h b/paddle/ir/utils.h new file mode 100644 index 0000000000000..b4dd00281e159 --- /dev/null +++ b/paddle/ir/utils.h @@ -0,0 +1,28 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace ir { +std::size_t hash_combine(std::size_t lhs, std::size_t rhs); + +void *aligned_malloc(size_t size, size_t alignment); + +void aligned_free(void *mem_ptr); + +} // namespace ir diff --git a/paddle/ir/value.cc b/paddle/ir/value.cc new file mode 100644 index 0000000000000..f5ecc41018bcf --- /dev/null +++ b/paddle/ir/value.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/ir/value.h" +#include "paddle/ir/value_impl.h" + +namespace ir { +// Operand +OpOperand::OpOperand(const detail::OpOperandImpl *impl) + : impl_(const_cast(impl)) {} + +OpOperand &OpOperand::operator=(const OpOperand &rhs) { + if (this == &rhs) return *this; + impl_ = rhs.impl_; + return *this; +} + +OpOperand &OpOperand::operator=(const detail::OpOperandImpl *impl) { + if (this->impl_ == impl) return *this; + impl_ = const_cast(impl); + return *this; +} + +bool OpOperand::operator==(OpOperand other) const { + return impl_ == other.impl_; +} + +bool OpOperand::operator!=(OpOperand other) const { + return impl_ != other.impl_; +} + +bool OpOperand::operator!() const { return impl_ == nullptr; } + +OpOperand::operator bool() const { return impl_; } + +detail::OpOperandImpl *OpOperand::impl() const { return impl_; } + +// Value +Value::Value(const detail::ValueImpl *impl) + : impl_(const_cast(impl)) {} + +bool Value::operator==(const Value &other) const { + return impl_ == other.impl_; +} + +bool Value::operator!=(const Value &other) const { + return impl_ != other.impl_; +} + +bool Value::operator!() const { return impl_ == nullptr; } + +Value::operator bool() const { return impl_; } + +detail::ValueImpl *Value::impl() const { return impl_; } + +ir::Type Value::type() const { return impl_->type(); } + +void Value::SetType(ir::Type type) { impl_->SetType(type); } + +Operation *Value::GetDefiningOp() const { + if (auto result = dyn_cast()) return result.owner(); + return nullptr; +} + +std::string Value::print_ud_chain() { return impl_->print_ud_chain(); } + +// OpResult +bool OpResult::classof(Value value) { + return ir::isa(value.impl()); +} + +Operation *OpResult::owner() const { return impl()->owner(); } + +uint32_t OpResult::GetResultIndex() const { return impl()->GetResultIndex(); } + +detail::OpResultImpl *OpResult::impl() const { + return reinterpret_cast(impl_); +} + +uint32_t OpResult::GetValidInlineIndex(uint32_t index) { + uint32_t max_inline_index = + ir::detail::OpResultImpl::GetMaxInlineResultIndex(); + return index <= max_inline_index ? index : max_inline_index; +} + +// details +namespace detail { +ir::Operation *OpOperandImpl::owner() const { return owner_; } + +ir::detail::OpOperandImpl *OpOperandImpl::next_use() { return next_use_; } + +OpOperandImpl::OpOperandImpl(ir::Value source, ir::Operation *owner) + : source_(source), owner_(owner) { + prev_use_addr_ = source.impl()->first_use_addr(); + next_use_ = source.impl()->first_use(); + if (next_use_) { + next_use_->prev_use_addr_ = &next_use_; + } + source.impl()->SetFirstUse(this); +} + +void OpOperandImpl::remove_from_ud_chain() { + if (!prev_use_addr_) return; + if (prev_use_addr_ == source_.impl()->first_use_addr()) { + /// NOTE: In ValueImpl, first_use_offseted_by_index_ use lower three bits + /// storage index information, so need to be updated using the SetFirstUse + /// method here. + source_.impl()->SetFirstUse(next_use_); + } else { + *prev_use_addr_ = next_use_; + } + if (next_use_) { + next_use_->prev_use_addr_ = prev_use_addr_; + } +} + +OpOperandImpl::~OpOperandImpl() { remove_from_ud_chain(); } + +uint32_t ValueImpl::index() const { + uint32_t index = + reinterpret_cast(first_use_offseted_by_index_) & 0x07; + if (index < 6) return index; + return reinterpret_cast(const_cast(this)) + ->GetResultIndex(); +} + +std::string ValueImpl::print_ud_chain() { + std::stringstream result; + result << "Value[" << this << "] -> "; + OpOperandImpl *tmp = first_use(); + if (tmp) { + result << "OpOperand[" << reinterpret_cast(tmp) << "] -> "; + while (tmp->next_use() != nullptr) { + result << "OpOperand[" << reinterpret_cast(tmp->next_use()) + << "] -> "; + tmp = tmp->next_use(); + } + } + result << "nullptr"; + return result.str(); +} + +uint32_t OpResultImpl::GetResultIndex() const { + if (const auto *outline_result = ir::dyn_cast(this)) { + return outline_result->GetResultIndex(); + } + return ir::dyn_cast(this)->GetResultIndex(); +} + +ir::Operation *OpResultImpl::owner() const { + // For inline result, pointer offset index to obtain the address of op. + if (const auto *result = ir::dyn_cast(this)) { + result += result->GetResultIndex() + 1; + return reinterpret_cast( + const_cast(result)); + } + // For outline result, pointer offset outline_index to obtain the address of + // maximum inline result. + const OpOutlineResultImpl *outline_result = + (const OpOutlineResultImpl *)(this); + outline_result += + (outline_result->outline_index_ - GetMaxInlineResultIndex()); + // The offset of the maximum inline result distance op is + // GetMaxInlineResultIndex. + const auto *inline_result = + reinterpret_cast(outline_result); + inline_result += (GetMaxInlineResultIndex() + 1); + return reinterpret_cast( + const_cast(inline_result)); +} +} // namespace detail +} // namespace ir diff --git a/paddle/ir/value.h b/paddle/ir/value.h new file mode 100644 index 0000000000000..3d197182cd6e9 --- /dev/null +++ b/paddle/ir/value.h @@ -0,0 +1,137 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/ir/cast_utils.h" +#include "paddle/ir/type.h" + +namespace ir { +class Operation; + +namespace detail { +class OpOperandImpl; +class ValueImpl; +class OpResultImpl; +} // namespace detail + +/// +/// \brief OpOperand class represents the operand of operation. This class only +/// provides interfaces, for specific implementation, see Impl class. +/// +class OpOperand { + public: + OpOperand() = default; + + OpOperand(const OpOperand &other) = default; + + OpOperand(const detail::OpOperandImpl *impl); // NOLINT + + OpOperand &operator=(const OpOperand &rhs); + + OpOperand &operator=(const detail::OpOperandImpl *impl); + + bool operator==(OpOperand other) const; + + bool operator!=(OpOperand other) const; + + bool operator!() const; + + explicit operator bool() const; + + detail::OpOperandImpl *impl() const; + + private: + detail::OpOperandImpl *impl_{nullptr}; +}; + +/// +/// \brief Value class represents the SSA value in the IR system. This class +/// only provides interfaces, for specific implementation, see Impl class. +/// +class Value { + public: + Value() = default; + + Value(const detail::ValueImpl *impl); // NOLINT + + Value(const Value &other) = default; + + bool operator==(const Value &other) const; + + bool operator!=(const Value &other) const; + + bool operator!() const; + + explicit operator bool() const; + + template + bool isa() const { + return ir::isa(*this); + } + + template + U dyn_cast() const { + return ir::dyn_cast(*this); + } + + detail::ValueImpl *impl() const; + + ir::Type type() const; + + void SetType(ir::Type type); + + Operation *GetDefiningOp() const; + + std::string print_ud_chain(); + + friend struct std::hash; + + protected: + detail::ValueImpl *impl_{nullptr}; +}; + +/// +/// \brief OpResult class represents the value defined by a result of operation. +/// This class only provides interfaces, for specific implementation, see Impl +/// class. +/// +class OpResult : public Value { + public: + using Value::Value; + + static bool classof(Value value); + + Operation *owner() const; + + uint32_t GetResultIndex() const; + + friend Operation; + + private: + static uint32_t GetValidInlineIndex(uint32_t index); + + detail::OpResultImpl *impl() const; +}; + +} // namespace ir + +namespace std { +template <> +struct hash { + std::size_t operator()(const ir::Value &obj) const { + return std::hash()(obj.impl_); + } +}; +} // namespace std diff --git a/paddle/ir/value_impl.h b/paddle/ir/value_impl.h new file mode 100644 index 0000000000000..2fa236dddd833 --- /dev/null +++ b/paddle/ir/value_impl.h @@ -0,0 +1,196 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/ir/value.h" + +namespace ir { +static const uint32_t OUTLINE_OP_RESULT_INDEX = 6; + +class Operation; + +namespace detail { +/// +/// \brief OpOperandImpl +/// +class OpOperandImpl { + public: + ir::Operation *owner() const; + + ir::detail::OpOperandImpl *next_use(); + + /// Remove this operand from the current use list. + void remove_from_ud_chain(); + + ~OpOperandImpl(); + + friend ir::Operation; + + private: + OpOperandImpl(ir::Value source, ir::Operation *owner); + + ir::detail::OpOperandImpl *next_use_ = nullptr; + + ir::detail::OpOperandImpl **prev_use_addr_ = nullptr; + + ir::Value source_; + + ir::Operation *owner_ = nullptr; +}; + +/// +/// \brief ValueImpl is the base class of all drived Value classes such as +/// OpResultImpl. This class defines all the information and usage interface in +/// the IR Value. Each Value include three attributes: +/// (1) type: ir::Type; (2) UD-chain of value: OpOperandImpl*, first operand +/// address with offset of this value; (3) index: the position where the output +/// list of the parent operator. +/// +class alignas(8) ValueImpl { + public: + /// + /// \brief Interface functions of "type_" attribute. + /// + ir::Type type() const { return type_; } + + void SetType(ir::Type type) { type_ = type; } + + /// + /// \brief Interface functions of "first_use_offseted_by_index_" attribute. + /// + uint32_t index() const; + + OpOperandImpl *first_use() const { + return reinterpret_cast( + reinterpret_cast(first_use_offseted_by_index_) & (~0x07)); + } + + void SetFirstUse(OpOperandImpl *first_use) { + uint32_t offset = index(); + first_use_offseted_by_index_ = reinterpret_cast( + reinterpret_cast(first_use) + offset); + VLOG(4) << "The index of this value is " << offset + << ". Offset and set first use: " << first_use << " -> " + << first_use_offseted_by_index_ << "."; + } + + OpOperandImpl **first_use_addr() { return &first_use_offseted_by_index_; } + + bool use_empty() const { return first_use() == nullptr; } + + std::string print_ud_chain(); + + protected: + /// + /// \brief Only can be constructed by derived classes such as OpResultImpl. + /// + explicit ValueImpl(ir::Type type, uint32_t index) { + if (index > OUTLINE_OP_RESULT_INDEX) { + throw("The value of index must not exceed 6"); + } + type_ = type; + first_use_offseted_by_index_ = reinterpret_cast( + reinterpret_cast(nullptr) + index); + VLOG(4) << "Construct a ValueImpl whose's index is " << index + << ". The offset first_use address is: " + << first_use_offseted_by_index_; + } + + /// + /// \brief Attribute1: Type of value. + /// + ir::Type type_; + + /// + /// \brief Attribute2/3: Record the UD-chain of value and index. + /// NOTE: The members of the OpOperandImpl include four pointers, so this + /// class is 8-byte aligned, and the lower 3 bits of its address are 0, so the + /// index can be stored in these 3 bits, stipulate: + /// (1) index = 0~5: represent positions 0 to 5 inline + /// output(OpInlineResultImpl); (2) index = 6: represent the position >=6 + /// outline output(OpOutlineResultImpl); (3) index = 7 is reserved. + /// + OpOperandImpl *first_use_offseted_by_index_ = nullptr; +}; + +/// +/// \brief OpResultImpl is the implementation of an operation result. +/// +class alignas(8) OpResultImpl : public ValueImpl { + public: + using ValueImpl::ValueImpl; + + static bool classof(const ValueImpl &value) { return true; } + + /// + /// \brief Get the parent operation of this result.(op_ptr = value_ptr + + /// index) + /// + ir::Operation *owner() const; + + /// + /// \brief Get the result index of the operation result. + /// + uint32_t GetResultIndex() const; + + /// + /// \brief Get the maximum number of results that can be stored inline. + /// + static uint32_t GetMaxInlineResultIndex() { + return OUTLINE_OP_RESULT_INDEX - 1; + } +}; + +/// +/// \brief OpInlineResultImpl is the implementation of an operation result whose +/// index <= 5. +/// +class OpInlineResultImpl : public OpResultImpl { + public: + OpInlineResultImpl(ir::Type type, uint32_t result_index) + : OpResultImpl(type, result_index) { + if (result_index > GetMaxInlineResultIndex()) { + throw("Inline result index should not exceed MaxInlineResultIndex(5)"); + } + } + + static bool classof(const OpResultImpl &value) { + return value.index() < OUTLINE_OP_RESULT_INDEX; + } + + uint32_t GetResultIndex() const { return index(); } +}; + +/// +/// \brief OpOutlineResultImpl is the implementation of an operation result +/// whose index > 5. +/// +class OpOutlineResultImpl : public OpResultImpl { + public: + OpOutlineResultImpl(ir::Type type, uint32_t outline_index) + : OpResultImpl(type, OUTLINE_OP_RESULT_INDEX), + outline_index_(outline_index) {} + + static bool classof(const OpResultImpl &value) { + return value.index() >= OUTLINE_OP_RESULT_INDEX; + } + + uint32_t GetResultIndex() const { return outline_index_; } + + uint32_t outline_index_; +}; + +} // namespace detail +} // namespace ir From cbdba5093302f78b2d5c1af331faf65b3a28bead Mon Sep 17 00:00:00 2001 From: Yulong Ao Date: Wed, 12 Apr 2023 18:25:39 +0800 Subject: [PATCH 56/59] [Auto Parallel] Move some changes or bug fixes from 2.4 to develop (#52721) * [Auto Parallel] Speedup the completion process * [Auto Parallel] Skip the property of dist_context when deepcopying * [Auto Parallel] Remove the unnecessary print * [Auto Parallel] Move some changes from 2.4 branch to develop * Update engine.py * [Auto Parallel] Fix a bug --- .../distributed/auto_parallel/constants.py | 10 + .../auto_parallel/cost/estimate_cost.py | 4 +- .../distributed/auto_parallel/dist_context.py | 49 ++ .../distributed/auto_parallel/dist_op.py | 93 +-- .../distributed/auto_parallel/dist_saver.py | 26 +- .../distributed/auto_parallel/engine.py | 182 ++--- .../distributed/auto_parallel/interface.py | 11 +- .../distributed/auto_parallel/parallelizer.py | 9 +- .../auto_parallel/parallelizer_v2.py | 33 +- .../auto_parallel/process_group.py | 8 +- .../distributed/auto_parallel/reshard.py | 90 ++- .../distributed/auto_parallel/strategy.py | 9 + .../auto_parallel/tuner/profiler.py | 2 +- .../communication/stream/all_reduce.py | 6 +- python/paddle/distributed/passes/__init__.py | 1 + .../passes/auto_parallel_pipeline.py | 626 ++++++++++++++++++ .../generation_pipeline_pass_unittest.py | 177 +++++ .../auto_parallel/test_dist_context.py | 1 + .../unittests/auto_parallel/test_pass_bf16.py | 2 +- .../test_pass_generation_pipeline.py | 57 ++ .../test_auto_parallel_reshard_serial.py | 3 + 21 files changed, 1174 insertions(+), 225 deletions(-) create mode 100644 python/paddle/distributed/passes/auto_parallel_pipeline.py create mode 100644 python/paddle/fluid/tests/unittests/auto_parallel/generation_pipeline_pass_unittest.py create mode 100644 python/paddle/fluid/tests/unittests/auto_parallel/test_pass_generation_pipeline.py diff --git a/python/paddle/distributed/auto_parallel/constants.py b/python/paddle/distributed/auto_parallel/constants.py index 83f5704f29cb0..d2fbadd78b9c5 100644 --- a/python/paddle/distributed/auto_parallel/constants.py +++ b/python/paddle/distributed/auto_parallel/constants.py @@ -102,6 +102,16 @@ def set_field_default_config(category, field, default_value): set_field_default_config(GRADIENT_MERGE, "k_steps", 1) set_field_default_config(GRADIENT_MERGE, "avg", True) +######################################### +# pipeline configuration +######################################### +PIPELINE = "pipeline" +set_field_default_config(PIPELINE, "enable", False) +set_field_default_config(PIPELINE, "schedule_mode", "1F1B") +set_field_default_config(PIPELINE, "micro_batch_size", 1) +set_field_default_config(PIPELINE, "accumulate_steps", 1) +set_field_default_config(PIPELINE, "generation_batch_size", 1) + ######################################### # quantization configuration ######################################### diff --git a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py index 6c081f94a2aad..f9c0b3cb15db2 100644 --- a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py @@ -606,8 +606,8 @@ def get_cost_from_engine(engine, mode): ) serial_startup_prog = ( - engine._serial_startup_progs[mode].clone() - if mode in engine._serial_startup_progs + engine._fwd_dist_contexts[mode]._original_serial_main_program.clone() + if mode in engine._fwd_dist_contexts else engine._orig_startup_prog.clone() ) losses = ( diff --git a/python/paddle/distributed/auto_parallel/dist_context.py b/python/paddle/distributed/auto_parallel/dist_context.py index 22a83ae341d62..f3418f271825a 100644 --- a/python/paddle/distributed/auto_parallel/dist_context.py +++ b/python/paddle/distributed/auto_parallel/dist_context.py @@ -130,6 +130,9 @@ def __init__( # A flag indicates whether the used parallelism is data parallel self._data_parallel = False + # record upstream and downstream of cur rank + self._up_down_streams = UpDownStream() + self._json_config = json_config @property @@ -218,6 +221,10 @@ def gradient_scale(self, gs): def data_parallel(self): return self._data_parallel + @property + def up_down_streams(self): + return self._up_down_streams + @data_parallel.setter def data_parallel(self, dp): self._data_parallel = dp @@ -1220,3 +1227,45 @@ def parse_backward_blocks(self, program): self.nblock += 1 assert self.nblock == len(program.blocks) + + +class UpDownStream: + def __init__(self): + self._ups = {} + self._downs = {} + + def add_up_stream(self, rank, up_stream): + ups = self._ups.get(rank, None) + if not ups: + self._ups[rank] = [up_stream] + elif up_stream != -1: + ups = list(filter(lambda a: a != -1, ups)) + ups.append(up_stream) + self._ups[rank] = ups + + def add_down_stream(self, rank, down_stream): + downs = self._downs.get(rank, None) + if not downs: + self._downs[rank] = [down_stream] + elif down_stream != -1: + downs = list(filter(lambda a: a != -1, downs)) + downs.append(down_stream) + self._downs[rank] = downs + + def add_pair_stream(self, up, down): + self.add_up_stream(up, -1) + self.add_up_stream(down, up) + self.add_down_stream(up, down) + self.add_down_stream(down, -1) + + def ups(self, rank): + ups = self._ups.get(rank, None) + if not ups: + return None + return list(set(ups)) + + def downs(self, rank): + downs = self._downs.get(rank, None) + if not downs: + return None + return list(set(downs)) diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/dist_op.py index 7960adafbdfc4..8489d3f3332a6 100644 --- a/python/paddle/distributed/auto_parallel/dist_op.py +++ b/python/paddle/distributed/auto_parallel/dist_op.py @@ -29,8 +29,6 @@ class DistributedOperator: def __init__(self, serial_op, dist_attr=None): self._serial_op = serial_op if dist_attr is not None and isinstance(dist_attr, OperatorDistAttr): - pass - # TODO: remove this deepcopy after we fix the issue self._dist_attr = copy.deepcopy(dist_attr) # self._dist_attr = dist_attr @@ -56,21 +54,6 @@ def dist_attr(self, dist_attr): self._dist_attr = dist_attr # TODO: Do we really need to write back to serial op? self._serial_op.dist_attr = dist_attr - # if self._dist_attr is None: - # self._dist_attr = OperatorDistAttr() - # # Create new dist_attr related to current serial_op - # dist_attr = self._filter_dist_attr(dist_attr) - # # Append suffix to mark the inputs or outputs - # if isinstance(dist_attr, dict): - # # Copy the keys since we may add new ones - # for key in list(dist_attr.keys()): - # if isinstance(key, Variable): - # if key.name in self._serial_op.input_arg_names: - # dist_attr[append_op_input_suffix(key.name)] = True - # if key.name in self._serial_op.output_arg_names: - # dist_attr[append_op_output_suffix(key.name)] = True - # self._dist_attr.init(dist_attr) - # self._init_default_dist_attr() def get_serial_input(self, name): if self._serial_op.type == "create_py_reader": @@ -83,81 +66,6 @@ def get_serial_output(self, name): tensor = self._serial_op.block._var_recursive(name) return tensor - # def _init_default_dist_attr(self): - # for tensor_name in self._serial_op.input_arg_names: - # if self._serial_op.type == "create_py_reader": - # tensor = None - # else: - # tensor = self._serial_op.block._var_recursive(tensor_name) - # self._serial_inputs[tensor_name] = tensor - # if tensor is None: - # tensor_shape = [] - # else: - # if tensor.type in __no_shape_var_type__: - # tensor_shape = [] - # else: - # tensor_shape = tensor.shape - # if self._dist_attr.get_input_dims_mapping(tensor_name) is None: - # tensor_dims_mapping = [-1 for _ in range(len(tensor_shape))] - # self._dist_attr.set_input_dims_mapping( - # tensor_name, tensor_dims_mapping - # ) - # for tensor_name in self._serial_op.output_arg_names: - # tensor = self._serial_op.block._var_recursive(tensor_name) - # if tensor.type in __no_shape_var_type__: - # tensor_shape = [] - # else: - # tensor_shape = tensor.shape - # self._serial_outputs[tensor_name] = tensor - # if self._dist_attr.get_output_dims_mapping(tensor_name) is None: - # tensor_dims_mapping = [-1 for _ in range(len(tensor_shape))] - # self._dist_attr.set_output_dims_mapping( - # tensor_name, tensor_dims_mapping - # ) - # if self._dist_attr.op_type is None: - # self._dist_attr.op_type = self.serial_op.type - # if self._dist_attr.impl_type is None: - # self._dist_attr.impl_type = "default" - # if self._dist_attr.impl_idx is None: - # self._dist_attr.impl_idx = 0 - # if self._dist_attr.is_recompute is None: - # self._dist_attr.is_recompute = False - - # def _filter_dist_attr(self, dist_attr): - # if dist_attr is None: - # return None - # new_dist_attr = None - # if isinstance(dist_attr, dict): - # new_dist_attr = {} - # for key, value in dist_attr.items(): - # if isinstance(key, Variable): - # if ( - # key.name in self._serial_op.input_arg_names - # or key.name in self._serial_op.output_arg_names - # ): - # new_dist_attr[key] = value - # else: - # new_dist_attr[key] = value - # elif isinstance(dist_attr, OperatorDistAttr): - # new_dist_attr = copy.deepcopy(dist_attr) - # new_dist_attr._inputs_dist_attrs.clear() - # new_dist_attr._outputs_dist_attrs.clear() - # for tensor_name in self._serial_op.input_arg_names: - # tensor_dist_attr = dist_attr.get_input_dist_attr(tensor_name) - # if tensor_dist_attr: - # new_dist_attr.set_input_dist_attr( - # tensor_name, tensor_dist_attr - # ) - # for tensor_name in self._serial_op.output_arg_names: - # tensor_dist_attr = dist_attr.get_output_dist_attr(tensor_name) - # if tensor_dist_attr: - # new_dist_attr.set_output_dist_attr( - # tensor_name, tensor_dist_attr - # ) - # else: - # assert False, "Cannot recognize the {} parameter.".format(dist_attr) - # return new_dist_attr - def validate_dist_attr(self): if "read" in self.serial_op.type or "while" == self.serial_op.type: return True @@ -402,5 +310,6 @@ def __call__(self, *args, **kwargs): if self._process_mesh is not None: dist_op.dist_attr.mark_annotated("process_mesh") default_dist_ctx.add_dist_op_for_program(dist_op) + default_dist_ctx.add_process_mesh(self._process_mesh) return output diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/dist_saver.py index 87a0319204fd3..8772d234ddf99 100644 --- a/python/paddle/distributed/auto_parallel/dist_saver.py +++ b/python/paddle/distributed/auto_parallel/dist_saver.py @@ -192,17 +192,27 @@ def save_inference_model(self, path, feed_vars, fetch_vars, exe, **kwargs): used_inputs += op.input_arg_names used_outputs += op.output_arg_names - for idx, var_name in enumerate(feed_vars_names): - if var_name not in used_inputs: - feed_vars_names.pop(idx) - for idx, var_name in enumerate(fetch_vars_names): - if var_name not in used_outputs: - fetch_vars_names.pop(idx) + # delete duplicated elements and keep order + feed_vars_names = list({}.fromkeys(feed_vars_names).keys()) + used_inputs = list({}.fromkeys(used_inputs).keys()) + fetch_vars_names = list({}.fromkeys(fetch_vars_names).keys()) + used_outputs = list({}.fromkeys(used_outputs).keys()) + + dist_feed_vars_names = [ + var_name for var_name in feed_vars_names if var_name in used_inputs + ] + dist_fetch_vars_names = [ + var_name + for var_name in fetch_vars_names + if var_name in used_outputs + ] dist_feed_vars = list( - reversed([global_block.vars[name] for name in feed_vars_names]) + reversed([global_block.vars[name] for name in dist_feed_vars_names]) ) - dist_fetch_vars = [global_block.vars[name] for name in fetch_vars_names] + dist_fetch_vars = [ + global_block.vars[name] for name in dist_fetch_vars_names + ] dist_filename = filename + "_dist" + str(rank_id) dist_path = os.path.join(dirname, dist_filename) diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index a84bea42d538f..9a4f8611daf42 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -17,7 +17,6 @@ import numbers import os import random -from collections import defaultdict import numpy as np @@ -154,7 +153,6 @@ def __init__( " or `paddle.static.Optimizer`." ) self._optimizer = auto_utils.validate_opt(optimizer) - self._orig_optimizer = copy.deepcopy(self._optimizer) metrics = metrics or [] for metric in auto_utils.to_list(metrics): @@ -185,6 +183,12 @@ def __init__( ) fleet.init(is_collective=True) + # for compute cost + # TODO: remove _fwd_main_progs and _orig_optimizer + self._fwd_dist_contexts = {} + self._fwd_main_progs = {} + self._orig_optimizer = copy.deepcopy(self._optimizer) + self._executor = None self._cur_rank = paddle.distributed.get_rank() self._nranks = paddle.distributed.get_world_size() @@ -194,14 +198,6 @@ def __init__( self._orig_startup_prog = static.default_startup_program() self._orig_dist_context = get_default_distributed_context() self._dist_contexts = {} - self._fwd_main_progs = {} - self._fwd_dist_contexts = {} - self._serial_main_progs = {} - self._serial_startup_progs = {} - self._dist_main_progs = defaultdict(dict) # dist main programs - self._dist_startup_progs = defaultdict(dict) # dist startup programs - self._feed_vars = {} - self._fetch_vars = {} self._planners = {} self._has_prepared = {"train": False, "eval": False, "predict": False} self._has_prepared_reader = { @@ -334,9 +330,9 @@ def _prepare_data_tensor(self, inputs_spec, labels_spec, inputs, labels): return inputs, labels - def _prepare_reader(self): - dist_main_prog = self._dist_main_progs[self._mode][self._cur_rank] + def _prepare_reader(self, feed_list=[]): dist_context = self._dist_contexts[self._mode] + dist_main_prog = dist_context.dist_main_programs[self._cur_rank] dist_main_block = dist_main_prog.global_block() # NOTE: this list may be changed if Paddle changes the existing rules. @@ -357,10 +353,13 @@ def _prepare_reader(self): if op.type in related_reader_ops: reader_op_indices.append(idx) # Step 2: insert the new reader ops to cpp + # record the read ops' desc to insert to program of forward task_node + read_ops_desc = [] new_reader_ops = [] for idx in reversed(reader_op_indices): new_op_desc = dist_main_block.desc._prepend_op() new_op_desc.copy_from(dist_main_block.ops[idx].desc) + read_ops_desc.append(new_op_desc) new_op = Operator( dist_main_block, new_op_desc, type=new_op_desc.type() ) @@ -379,6 +378,29 @@ def _prepare_reader(self): dist_main_block._sync_with_cpp() self._has_prepared_reader[self._mode] = True + # Insert read op to forward TaskNode if 1F1B pass is setted + if self.main_program._pipeline_opt: + assert "tasks" in self.main_program._pipeline_opt["fleet_opt"] + fleet_opt = self.main_program._pipeline_opt["fleet_opt"] + fwd_task = fleet_opt["tasks"][0] + fwd_prog = fwd_task.get_program() + fwd_block = fwd_prog.global_block() + + for var in feed_list: + if var.name not in fwd_block.vars: + fwd_block._clone_variable(var) + + for op_desc in read_ops_desc: + new_op_desc = fwd_block.desc._prepend_op() + new_op_desc.copy_from(op_desc) + new_op = Operator( + fwd_block, new_op_desc, type=new_op_desc.type() + ) + fwd_block.ops.insert(0, new_op) + + fwd_block._sync_with_cpp() + fwd_task.set_program(fwd_prog) + def _prepare_feed(self, data, user_feeds, mode): feeds = {} if data is not None: @@ -428,14 +450,16 @@ def _process_fetch_group(group_name, var_list): fetch_names.append([]) fetch_indices.append(group_indices) + dist_context = self._dist_contexts[mode] + fetch_vars = dist_context.serial_fetch_vars if mode != "predict": - _process_fetch_group("loss", self._fetch_vars[mode]["loss"]) + _process_fetch_group("loss", fetch_vars["loss"]) if mode != "predict": - metrics = self._fetch_vars[mode]["metrics"] + metrics = fetch_vars["metrics"] for i, var_list in enumerate(metrics): _process_fetch_group("metrics_" + str(i), var_list) if mode == "predict": - _process_fetch_group("outputs", self._fetch_vars[mode]["outputs"]) + _process_fetch_group("outputs", fetch_vars["outputs"]) for usr_fetch in user_fetches: var_name = _to_name_str(usr_fetch) fetch(var_name) @@ -472,7 +496,8 @@ def _prepare_logger( logs["loss"] = outs[idx][0] group_idx += 1 # logging metrics - metric_vars = self._fetch_vars[mode]["metrics"] + dist_context = self._dist_contexts[mode] + metric_vars = dist_context.serial_fetch_vars["metrics"] if metric_vars: for metric in self._metrics: metrics_indices = fetch_indices[group_idx] @@ -503,15 +528,18 @@ def _prepare_logger( logs["fetches"] = logs_fetch return logs - def _prepare_program(self, mode): + def _prepare_program(self, mode, init_parameters=True): # Do the build process self._build(mode) # Do the planning process self._plan(mode) # Do the parallel process self._parallel(mode) - # Init comm and startup program - self._initialize(mode) + # Init comm + self._init_comm() + if init_parameters: + # startup program + self._initialize(mode) self._has_prepared[mode] = True def _build(self, mode): @@ -543,9 +571,9 @@ def _build(self, mode): paddle.enable_static() else: - # build program in static graph mode - serial_main_prog = self._serial_main_progs.get(mode, None) - if serial_main_prog is not None: + # build program in static mode + dist_context = self._dist_contexts.get(mode, None) + if dist_context is not None: return outputs = [] @@ -735,42 +763,23 @@ def _init_dist_context(self, mode): ) dist_context.set_op_dist_attr_for_program(op, ref_op_dist_attr) - def _initialize(self, mode): - # Get the current content from the distributed context - self._serial_main_progs[mode] = self._dist_contexts[ - mode - ].serial_main_program - self._serial_startup_progs[mode] = self._dist_contexts[ - mode - ].serial_startup_program - self._dist_main_progs[mode] = self._dist_contexts[ - mode - ].dist_main_programs - self._dist_startup_progs[mode] = self._dist_contexts[ - mode - ].dist_startup_programs - self._feed_vars[mode] = self._dist_contexts[mode].serial_feed_vars - self._fetch_vars[mode] = self._dist_contexts[mode].serial_fetch_vars - self._optimizer = self._dist_contexts[mode]._serial_optimizer - + def _init_comm(self): if self._nranks > 1: # Traverse different rank programs and traverse each op of them, # instantiate communication by process_mapping. all_process_groups = get_all_process_groups() - cur_rank = self._cur_rank - # NOTE: After the implementation of the unified dynamic and static communication group - # initialization mode in the future, the initialization logic of full mode - # will be removed because port occupation error may occur. + if self._strategy.auto_mode == "full": auto_utils.initialize_pg_in_full_mode( - all_process_groups, cur_rank + all_process_groups, self._cur_rank ) else: for process_group in all_process_groups: - if cur_rank not in process_group.ranks: + if self._cur_rank not in process_group.ranks: continue process_group.instantiate() + def _initialize(self, mode): self._place = _get_device() if isinstance(self._place, paddle.framework.CUDAPlace): self._place = paddle.framework.CUDAPlace( @@ -782,9 +791,9 @@ def _initialize(self, mode): np.random.seed(self._strategy.seed + self._dp_ranks[0]) random.seed(self._strategy.seed + self._dp_ranks[0]) + dist_context = self._dist_contexts[mode] if self._dygraph_mode: - dist_context = self._dist_contexts[mode] - dist_main_program = self._dist_main_progs[mode][self._cur_rank] + dist_main_program = dist_context.dist_main_programs[self._cur_rank] self.program_helper.init( dist_main_program, self._place, dist_context ) @@ -792,7 +801,9 @@ def _initialize(self, mode): if self._executor is None: self._executor = paddle.static.Executor(self._place) uninitialized = [] - dist_startup_prog = self._dist_startup_progs[mode][self._cur_rank] + dist_startup_prog = dist_context.dist_startup_programs[ + self._cur_rank + ] for var in dist_startup_prog.list_vars(): scope_var = global_scope().find_var(var.name) if scope_var and scope_var.get_tensor()._is_initialized(): @@ -809,7 +820,9 @@ def _initialize(self, mode): if self._strategy.reinit: self._logger.info("NOTE: parameters will be re-initialized.") - dist_startup_prog = self._dist_startup_progs[mode][self._cur_rank] + dist_startup_prog = dist_context.dist_startup_programs[ + self._cur_rank + ] self._executor.run(dist_startup_prog) def fit( @@ -1282,6 +1295,7 @@ def prepare( main_program=None, startup_program=None, mode=None, + init_parameters=True, ): if mode is not None: self.to_mode(mode) @@ -1324,7 +1338,7 @@ def prepare( self._inputs_spec, self._labels_spec = inputs_spec, labels_spec self._inputs, self._labels = inputs, labels if not self._has_prepared[self._mode]: - self._prepare_program(self._mode) + self._prepare_program(self._mode, init_parameters) else: self._switch_mode(self._mode) @@ -1375,16 +1389,17 @@ def _prepare_dataloader( ) batch_size //= self._k_steps - dist_main_prog = self._dist_main_progs[self._mode][self._cur_rank] - dist_startup_prog = self._dist_startup_progs[self._mode][self._cur_rank] + dist_context = self._dist_contexts[self._mode] + dist_main_prog = dist_context.dist_main_programs[self._cur_rank] + dist_startup_prog = dist_context.dist_startup_programs[self._cur_rank] dist_main_block = dist_main_prog.global_block() # NOTE: Get feed_list, then insert dataloader op with sharded var shape. # Cause predict_program does not contain labels var, # then we will add labels var from serial_program to dist_program, # that maintains the length of feed_list equal to the length of dataset's values. - inputs_var = self._feed_vars[self._mode]["inputs"] - labels_var = self._feed_vars[self._mode]["labels"] + inputs_var = dist_context.serial_feed_vars["inputs"] + labels_var = dist_context.serial_feed_vars["labels"] feed_list = [] for var in inputs_var + labels_var: if var.name in dist_main_block.vars: @@ -1443,16 +1458,17 @@ def _prepare_dataloader_from_generator( ) batch_size //= self._k_steps - dist_main_prog = self._dist_main_progs[self._mode][self._cur_rank] - dist_startup_prog = self._dist_startup_progs[self._mode][self._cur_rank] + dist_context = self._dist_contexts[self._mode] + dist_main_prog = dist_context.dist_main_programs[self._cur_rank] + dist_startup_prog = dist_context.dist_startup_programs[self._cur_rank] dist_main_block = dist_main_prog.global_block() # NOTE: Get feed_list, then insert dataloader op with sharded var shape. # Cause predict_program does not contain labels var, # then we will add labels var from serial_program to dist_program, # that maintains the length of feed_list equal to the length of dataset's values. - inputs_var = self._feed_vars[self._mode]["inputs"] - labels_var = self._feed_vars[self._mode]["labels"] + inputs_var = dist_context.serial_feed_vars["inputs"] + labels_var = dist_context.serial_feed_vars["labels"] feed_list = [] for var in inputs_var + labels_var: if var.name in dist_main_block.vars: @@ -1482,7 +1498,7 @@ def _prepare_dataloader_from_generator( data_parallel_world_size=self._dp_world_sizes, data_parallel_rank=self._dp_ranks, ) - self._prepare_reader() + self._prepare_reader(feed_list) return dataloader def _tune(self, tune_data, tune_sample_split=None, batch_size=1): @@ -1542,7 +1558,7 @@ def _metrics_name(self): def _switch_mode(self, mode): assert ( - mode in self._dist_main_progs + mode in self._dist_contexts ), f"{mode} model is not ready, please call `prepare()` first." self.to_mode(mode) self._optimizer = self._dist_contexts[mode]._serial_optimizer @@ -1556,8 +1572,8 @@ def to_mode(self, mode): self._mode = mode def _set_state_dict(self, mode, strict, state_dict, dist_attr): - program = self._dist_main_progs[mode][self._cur_rank] dist_context = self._dist_contexts[mode] + program = dist_context.dist_main_programs[self._cur_rank] cur_dist_attr = auto_utils.get_dist_attr(program, dist_context) converter = Converter(state_dict, dist_attr, cur_dist_attr) state_dict = converter.convert(strict=strict) @@ -1622,10 +1638,10 @@ def save(self, path, training=True): """ if training: - assert self._mode in self._serial_main_progs - serial_program = self._serial_main_progs[self._mode] - dist_main_prog = self._dist_main_progs[self._mode][self._cur_rank] + assert self._mode in self._dist_contexts dist_context = self._dist_contexts[self._mode] + serial_program = dist_context.serial_main_program + dist_main_prog = dist_context.dist_main_programs[self._cur_rank] self._saver.save( path, serial_program=serial_program, @@ -1633,10 +1649,11 @@ def save(self, path, training=True): dist_context=dist_context, ) else: - assert "predict" in self._dist_main_progs - feed_vars = self._feed_vars["predict"]['inputs'] - fetch_vars = self._fetch_vars["predict"]['outputs'] - dist_main_prog = self._dist_main_progs["predict"][self._cur_rank] + assert "predict" in self._dist_contexts + dist_context = self._dist_contexts["predict"] + feed_vars = dist_context.serial_feed_vars['inputs'] + fetch_vars = dist_context.serial_fetch_vars['outputs'] + dist_main_prog = dist_context.dist_main_programs[self._cur_rank] if self._strategy.qat.enable and self._strategy.qat.onnx_format: from paddle.static.quantization import QuantWeightPass @@ -1776,11 +1793,13 @@ def cost(self, inputs_spec=None, labels_spec=None, mode=None): @property def main_program(self): - return self._dist_main_progs[self._mode][self._cur_rank] + dist_context = self._dist_contexts[self._mode] + return dist_context.dist_main_programs[self._cur_rank] @property def startup_program(self): - return self._dist_startup_progs[self._mode][self._cur_rank] + dist_context = self._dist_contexts[self._mode] + return dist_context.dist_startup_programs[self._cur_rank] @property def dist_context(self): @@ -1788,15 +1807,30 @@ def dist_context(self): @property def serial_main_program(self): - return self._serial_main_progs[self._mode] + dist_context = self._dist_contexts[self._mode] + return dist_context.serial_main_program @property def serial_startup_program(self): - return self._serial_startup_progs[self._mode] + dist_context = self._dist_contexts[self._mode] + return dist_context.serial_startup_program + + @property + def feed_vars(self): + dist_context = self._dist_contexts[self._mode] + return dist_context.serial_feed_vars @property def fetch_vars(self): - return self._fetch_vars[self._mode] + dist_context = self._dist_contexts[self._mode] + return dist_context.serial_fetch_vars + + @property + def optimizer(self): + dist_context = self._dist_contexts[self._mode] + if dist_context._serial_optimizer: + return dist_context._serial_optimizer + return self._optimizer @property def inputs(self): diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py index 9fda85ecef010..76207bc588968 100644 --- a/python/paddle/distributed/auto_parallel/interface.py +++ b/python/paddle/distributed/auto_parallel/interface.py @@ -79,7 +79,15 @@ def shard_tensor(x, process_mesh=None, shard_spec=None): assert isinstance( shard_spec, list ), f"Argument shard_spec {shard_spec} is not an instance of list" - dist_tensor = DistributedTensor(x) + if isinstance(x, str): + x = ( + paddle.static.default_main_program() + .global_block() + ._var_recursive(x) + ) + dist_tensor = DistributedTensor(x) + else: + dist_tensor = DistributedTensor(x) serial_tensor = dist_tensor.serial_tensor dist_tensor.dist_attr.process_mesh = process_mesh if serial_tensor.type in __no_shape_var_type__: @@ -102,6 +110,7 @@ def shard_tensor(x, process_mesh=None, shard_spec=None): default_dist_ctx = get_default_distributed_context() default_dist_ctx.add_dist_tensor_for_program(dist_tensor) dist_tensor = default_dist_ctx.get_dist_tensor_for_program(x) + default_dist_ctx.add_process_mesh(process_mesh) return x diff --git a/python/paddle/distributed/auto_parallel/parallelizer.py b/python/paddle/distributed/auto_parallel/parallelizer.py index d2463f3308637..549f618c6cbc9 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer.py +++ b/python/paddle/distributed/auto_parallel/parallelizer.py @@ -499,12 +499,19 @@ def parallelize( break if is_pipeline: with paddle.static.program_guard(dist_main_prog): - paddle.distributed.barrier() + paddle.distributed.barrier(get_process_group(0)) # Traverse different rank programs and traverse each op of them, # instantiate communication by process_mapping. all_process_groups = get_all_process_groups() for process_group in all_process_groups: + if len(_g_process_group_map) > 0: + tmp = paddle.to_tensor([1], dtype="int32") + paddle.distributed.all_reduce( + tmp, sync_op=True, group=_g_process_group_map[0] + ) + paddle.device.cuda.synchronize() + if rank not in process_group.ranks: continue process_group.instantiate() diff --git a/python/paddle/distributed/auto_parallel/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/parallelizer_v2.py index a76a3f5dcb9ab..c4ef623b17260 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer_v2.py +++ b/python/paddle/distributed/auto_parallel/parallelizer_v2.py @@ -177,10 +177,22 @@ def parallel(self, rank): time.time() - time0, self._mode ) ) + # Apply post optimization passes + time0 = time.time() + self._apply_post_optimization( + dist_main_prog, dist_startup_prog, rank, dist_params_grads + ) + self._logger.debug( + "within parallel apply_post_optimization time: {}, mode {}".format( + time.time() - time0, self._mode + ) + ) # Clone program for test if self._mode != 'train': + pipeline_opt = dist_main_prog._pipeline_opt dist_main_prog = dist_main_prog.clone(for_test=True) dist_startup_prog = dist_startup_prog.clone(for_test=True) + dist_main_prog._pipeline_opt = pipeline_opt # Store the distributed programs for further usages self._dist_context.dist_main_programs[rank] = dist_main_prog @@ -247,7 +259,7 @@ def _apply_pre_optimization( # apply quantization pass # The pass can be applied when mode must be 'train' - if self._strategy.qat.enable: + if self._mode == 'train' and self._strategy.qat.enable: config = copy.deepcopy(self._strategy.qat.to_dict()) config["dist_context"] = self._dist_context config["params_grads"] = params_grads @@ -307,8 +319,8 @@ def _apply_post_optimization( ) params_grads = self._pass_context.get_attr("params_grads") - # GradClip is train-only optimization if self._mode == "train": + # GradClip is train-only optimization config = copy.deepcopy(self._strategy.sharding.to_dict()) config["dist_context"] = self._dist_context config["params_grads"] = params_grads @@ -330,6 +342,13 @@ def _apply_post_optimization( [main_program], [startup_program], self._pass_context ) + if self._strategy.pipeline.enable: + self._strategy.gradient_merge.enable = True + self._strategy.gradient_merge.k_steps = ( + self._strategy.pipeline.accumulate_steps + ) + self._strategy.gradient_merge.avg = True + # gradient_merge is then train-only optimization if self._mode == "train" and self._strategy.gradient_merge.enable: config = copy.deepcopy(self._strategy.gradient_merge.to_dict()) @@ -342,6 +361,16 @@ def _apply_post_optimization( [main_program], [startup_program], self._pass_context ) + if self._strategy.pipeline.enable: + config = copy.deepcopy(self._strategy.pipeline.to_dict()) + config["dist_context"] = self._dist_context + auto_parallel_pipeline_pass = new_pass( + "auto_parallel_pipeline", config + ) + auto_parallel_pipeline_pass.apply( + [main_program], [startup_program], self._pass_context + ) + if self._mode == "train" and self._strategy.fused_passes.enable: if len(self._strategy.fused_passes.fused_passes_list) > 0: new_pass_list = [] diff --git a/python/paddle/distributed/auto_parallel/process_group.py b/python/paddle/distributed/auto_parallel/process_group.py index 83e1642ba21bb..8c300cbcd53b6 100644 --- a/python/paddle/distributed/auto_parallel/process_group.py +++ b/python/paddle/distributed/auto_parallel/process_group.py @@ -52,9 +52,9 @@ def new_process_group(ranks, group_id=None, force_new_group=False): global _g_process_group_map if not force_new_group: # A key constructed from ranks is used for avoiding duplication - new_key = ''.join(map(str, sorted(ranks))) + new_key = ''.join(map(str, ranks)) for pg_id, pg in _g_process_group_map.items(): - cur_key = ''.join(map(str, sorted(pg.ranks))) + cur_key = ''.join(map(str, pg.ranks)) if pg_id != 0 and new_key == cur_key: return pg # If not matching the existing one, construct a new process group @@ -82,7 +82,7 @@ def __init__(self, group_id, ranks): group_id != 0 ), "Process group id 0 is reserved for all ranks." self._group_id = group_id - self._ranks = sorted(ranks) + self._ranks = ranks # Add the current ranks into group 0 if group_id != 0: global _g_process_group_map @@ -109,7 +109,7 @@ def add_ranks(self, new_ranks): not self.is_instantiate() ), "Cannot add new ranks after instantiating the process group" self._ranks.extend(new_ranks) - self._ranks = sorted(set(self.ranks)) + self._ranks = list(set(self.ranks)) def local_rank(self, global_rank): if global_rank in self.ranks: diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/reshard.py index 7461e85c67248..91e07fc651d20 100644 --- a/python/paddle/distributed/auto_parallel/reshard.py +++ b/python/paddle/distributed/auto_parallel/reshard.py @@ -848,7 +848,8 @@ def remove_no_need_ops(auto_parallel_main_prog, dist_context, rank_id): remove_op_idx.append(idx) for idx in remove_op_idx[::-1]: - block._remove_op(idx) + block._remove_op(idx, sync=False) + block._sync_with_cpp() @staticmethod def remove_no_need_vars( @@ -1000,7 +1001,8 @@ def remove_no_need_in_startup( if is_no_need_op: remove_op_idx.append(idx) for idx in remove_op_idx[::-1]: - startup_block._remove_op(idx) + startup_block._remove_op(idx, sync=False) + startup_block._sync_with_cpp() class Resharder: @@ -1441,6 +1443,8 @@ def find_op_desc_seq(self, dist_tensor, dist_attr, serial=False): target_process_group = target_process_mesh.process_ids target_process_shape = target_process_mesh.shape + op_role = dist_attr[2] + if source_tensor.shape[0] < 0: assert source_tensor.shape[0] == -1 new_shape = list(source_tensor.shape) @@ -1583,6 +1587,10 @@ def find_op_desc_seq(self, dist_tensor, dist_attr, serial=False): Resharder.concat_partitions( partition_index_list, source_partition_index ) + if int(op_role) == int(OpRole.Forward): + self.dist_context.up_down_streams.add_pair_stream( + to_send_process, target_process + ) # append concat op desc op_desc_seq[target_process].append( @@ -2037,13 +2045,6 @@ def parse_op_desc( op_dist_attr.set_input_dims_mapping( new_name, dims_mapping ) - # if ( - # old_name - # in op_dist_attr._inputs_dist_attrs - # ): - # op_dist_attr.del_input_dist_attr( - # old_name - # ) op_dist_attr.set_input_dims_mapping( new_name, dims_mapping ) @@ -2067,7 +2068,6 @@ def parse_op_desc( op_dist_attr.set_input_dims_mapping( new_name, dims_mapping ) - # op_dist_attr.del_input_dist_attr(old_name) op_dist_attr.set_input_dims_mapping( new_name, dims_mapping ) @@ -2095,7 +2095,6 @@ def parse_op_desc( op_dist_attr.set_input_dims_mapping( new_name, dims_mapping ) - # op_dist_attr.del_input_dist_attr(old_name) op_dist_attr.set_input_dims_mapping( new_name, dims_mapping ) @@ -2135,7 +2134,13 @@ def _get_subblock_input_attrs(self, op, var_name): has_exist = True break if not has_exist: - input_attrs.append([process_mesh, input_dims_mapping]) + input_attrs.append( + [ + process_mesh, + input_dims_mapping, + op.attr('op_role'), + ] + ) return input_attrs def _get_subblock_output_attrs(self, op, var_name): @@ -2165,7 +2170,13 @@ def _get_subblock_output_attrs(self, op, var_name): has_exist = True break if not has_exist: - output_attrs.append([process_mesh, output_dims_mapping]) + output_attrs.append( + [ + process_mesh, + output_dims_mapping, + op.attr('op_role'), + ] + ) return output_attrs def _get_common_op_input_attrs(self, op, var_name): @@ -2188,7 +2199,9 @@ def _get_common_op_input_attrs(self, op, var_name): input_dims_mapping = dist_attr.get_input_dims_mapping(var_name) input_attrs = [] for process_mesh in process_meshes: - input_attrs.append([process_mesh, input_dims_mapping]) + input_attrs.append( + [process_mesh, input_dims_mapping, op.attr('op_role')] + ) return input_attrs @@ -2207,7 +2220,7 @@ def get_op_input_attrs(self, op, var_name): assert ( op_input_attrs - ), "The input '{}' of op '{}' has no distibution attributes in subblock".format( + ), "The input '{}' of op '{}' has no distributed attributes in subblock".format( op.name, var_name ) @@ -2215,30 +2228,24 @@ def get_op_input_attrs(self, op, var_name): def _remove_global_process_mesh(self): """Remove global process mesh from dist_context.process_meshes""" - processes = set() + process_ids = set() process_mesh_count = len(self.dist_context.process_meshes) if process_mesh_count > 1: - global_process_mesh_idx = None + global_process_mesh_idx = [] + has_sub_process_mesh = False for process_mesh in self.dist_context.process_meshes: - for process in process_mesh.process_ids: - processes.add(process) + for process_id in process_mesh.process_ids: + process_ids.add(process_id) for idx, process_mesh in enumerate( self.dist_context.process_meshes ): - if len(set(process_mesh.process_ids)) == len(processes): - global_process_mesh_idx = idx - break + if len(set(process_mesh.process_ids)) == len(process_ids): + global_process_mesh_idx.append(idx) + elif set(process_mesh.process_ids) < process_ids: + has_sub_process_mesh = True - if global_process_mesh_idx is not None: - is_removed = False - global_mesh = self.dist_context.process_meshes[idx] - for i, mesh in enumerate(self.dist_context.process_meshes): - if i == idx: - continue - if set(mesh.process_ids) < set(global_mesh.process_ids): - is_removed = True - - if is_removed: + if has_sub_process_mesh: + for idx in reversed(global_process_mesh_idx): self.dist_context.process_meshes.pop(idx) def _change_subblock_op_input_and_output(self, block_idx, block): @@ -2278,7 +2285,6 @@ def _change_subblock_op_input_and_output(self, block_idx, block): op_dist_attr.set_input_dist_attr( new_name, op_input_dist_attr ) - # op_dist_attr.del_input_dist_attr(old_name) # the outputs also need to be renamed when the output name is the same with input name in inplace op for var_name in op.output_arg_names: @@ -2302,7 +2308,6 @@ def _change_subblock_op_input_and_output(self, block_idx, block): op_dist_attr.set_output_dist_attr( new_name, op_output_dist_attr ) - # op_dist_attr.del_output_dist_attr(old_name) def _reshard_input(self, block): idx = 0 @@ -2450,7 +2455,7 @@ def _hadnle_recv(self, block, idx, var, op, send_rank, recv_rank): assert set_lod is True # cast int64 to bool - block._insert_op( + cast_op = block._insert_op( idx + 2, type='cast', inputs={ @@ -2465,6 +2470,7 @@ def _hadnle_recv(self, block, idx, var, op, send_rank, recv_rank): 'op_role': op.attr('op_role'), }, ) + cast_op._set_attr('op_namescope', "/auto_parallel/reshard") else: if var.lod_level != 0: recv_out = block.create_var( @@ -2612,6 +2618,10 @@ def _reshard_output(self, block): ] if recv_rank == item: continue + if var.shape[0] == -1: + new_shape = list(var.shape) + new_shape[0] = self.batch_size + var.desc.set_shape(new_shape) if self.rank_id == item: # if send bool data, cast then send self._handle_send( @@ -2640,6 +2650,10 @@ def _reshard_output(self, block): item = output_attr[0].process_ids[index] if recv_rank == item: continue + if var.shape[0] == -1: + new_shape = list(var.shape) + new_shape[0] = self.batch_size + var.desc.set_shape(new_shape) if self.rank_id == item: # if send bool data, cast then send self._handle_send( @@ -2714,7 +2728,11 @@ def get_cost(self, op, tensor, cluster): tensor.name ) process_mesh = dist_op.dist_attr.process_mesh - dist_attr = [process_mesh, dims_mapping] + dist_attr = [ + process_mesh, + dims_mapping, + dist_op.serial_op.attr('op_role'), + ] if dist_tensor is not None and self.need_reshard( dist_tensor, dist_attr ): diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py index 58a08586ff5cb..a4dd2c54d2eed 100644 --- a/python/paddle/distributed/auto_parallel/strategy.py +++ b/python/paddle/distributed/auto_parallel/strategy.py @@ -102,6 +102,12 @@ def __init__(self, config_dict=None): super().__init__(category, config_dict) +class PipelineConfig(BaseConfig): + def __init__(self, config_dict=None): + category = constants.PIPELINE + super().__init__(category, config_dict) + + class QATConfig(BaseConfig): def __init__(self, config_dict=None): category = constants.QAT @@ -186,6 +192,9 @@ def __init__(self, config=None): config_dict = self._config_dict.get(constants.GRADIENT_MERGE, None) self.gradient_merge = GradientMergeConfig(config_dict) + config_dict = self._config_dict.get(constants.PIPELINE, None) + self.pipeline = PipelineConfig(config_dict) + config_dict = self._config_dict.get(constants.QAT, None) self.qat = QATConfig(config_dict) diff --git a/python/paddle/distributed/auto_parallel/tuner/profiler.py b/python/paddle/distributed/auto_parallel/tuner/profiler.py index cca53773ebbef..27e0fa4984544 100644 --- a/python/paddle/distributed/auto_parallel/tuner/profiler.py +++ b/python/paddle/distributed/auto_parallel/tuner/profiler.py @@ -91,7 +91,7 @@ def init_process_groups(group_map, rank): # TODO should instantiate global group first all_process_groups = get_all_process_groups() for process_group in all_process_groups: - if rank not in process_group.ranks: + if process_group.id == 0 or rank not in process_group.ranks: continue print(process_group) process_group.instantiate() diff --git a/python/paddle/distributed/communication/stream/all_reduce.py b/python/paddle/distributed/communication/stream/all_reduce.py index 3b870afe6f5c1..6b38bffc0bf3f 100644 --- a/python/paddle/distributed/communication/stream/all_reduce.py +++ b/python/paddle/distributed/communication/stream/all_reduce.py @@ -122,9 +122,9 @@ def all_reduce( tensor, op, group, sync_op, use_calc_stream ) else: - assert ( - group is None - ), "Group can not be used in static graph mode for now." + # assert ( + # group is None + # ), "Group can not be used in static graph mode for now." return _all_reduce_in_static_mode( tensor, op, group, sync_op, use_calc_stream ) diff --git a/python/paddle/distributed/passes/__init__.py b/python/paddle/distributed/passes/__init__.py index 8550cb049b11e..8ab110e60c3b9 100644 --- a/python/paddle/distributed/passes/__init__.py +++ b/python/paddle/distributed/passes/__init__.py @@ -23,6 +23,7 @@ from .auto_parallel_data_parallel_optimization import * # noqa: F403 from .auto_parallel_grad_clip import * # noqa: F403 from .auto_parallel_supplement_explicit_dependencies import * # noqa: F403 +from .auto_parallel_pipeline import * # noqa: F403 from .cpp_pass import * # noqa: F403 from .ps_trainer_pass import * # noqa: F403 from .ps_server_pass import * # noqa: F403 diff --git a/python/paddle/distributed/passes/auto_parallel_pipeline.py b/python/paddle/distributed/passes/auto_parallel_pipeline.py new file mode 100644 index 0000000000000..5b707d088bf8b --- /dev/null +++ b/python/paddle/distributed/passes/auto_parallel_pipeline.py @@ -0,0 +1,626 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from paddle.distributed.fleet.fleet_executor_utils import TaskNode +from paddle.fluid import core +from paddle.fluid.framework import Parameter, Program + +from .pass_base import PassBase, register_pass + +__not_shape_var_type__ = [ + core.VarDesc.VarType.READER, + core.VarDesc.VarType.STEP_SCOPES, + core.VarDesc.VarType.LOD_TENSOR_ARRAY, + core.VarDesc.VarType.FEED_MINIBATCH, + core.VarDesc.VarType.FETCH_LIST, +] + + +@register_pass("auto_parallel_pipeline") +class PipelinePass(PassBase): + def __init__(self): + super().__init__() + self.set_attr("dist_context", None) + + def _check_self(self): + if self.get_attr("dist_context") is None: + return False + return True + + def _check_conflict(self, other_pass): + return True + + def _apply_single_impl(self, main_program, startup_program, context): + self._dist_context = self.get_attr("dist_context") + self._acc_steps = self.get_attr("accumulate_steps") + self._mode = self.get_attr("schedule_mode") + self._gen_bsz = self.get_attr("generation_batch_size") + self._program = main_program + + if self._mode == "1F1B": + raise NotImplementedError("1F1B has not been implemented") + elif self._mode == "F-Then-B": + raise NotImplementedError("F-Then-B has not been implemented") + elif self._mode == "stream": + self._insert_sync_ops_for_stream() + self._task_stream() + else: + raise ValueError( + "Now only 'F-then-B', '1F1B' and 'stream' are supported." + "The given value is {}.".format(self._mode) + ) + + def _insert_sync_ops_for_stream(self): + + for block in self._program.blocks: + offset = 0 + send_vars = [] + # insert sync ops + for index, op in enumerate(list(block.ops)): + if op.type == 'send_v2': + # step1: set 'use_calc_stream' False + op._set_attr("use_calc_stream", False) + op_role = op.attr('op_role') + # step2: insert 'c_sync_calc_stream' op before 'send_v2' op + var_name = op.input_arg_names[0] + var = block.var(var_name) + block._insert_op_without_sync( + index=index + offset, + type="c_sync_calc_stream", + inputs={'X': [var]}, + outputs={'Out': [var]}, + attrs={'op_role': op_role}, + ) + offset += 1 + send_vars.append(var_name) + + for var_name in send_vars: + nop_op = block.append_op(type='nop') + nop_op.desc.set_input('X', [var_name]) + nop_op.desc.set_output('Out', [var_name]) + + block._sync_with_cpp() + + def _create_param(self, dst_block, src_var): + copied_kwargs = {} + copied_kwargs['trainable'] = src_var.trainable + copied_kwargs['optimize_attr'] = src_var.optimize_attr + copied_kwargs['regularizer'] = src_var.regularizer + copied_kwargs['do_model_average'] = src_var.do_model_average + copied_kwargs['need_clip'] = src_var.need_clip + + Parameter( + block=dst_block, + type=src_var.type, + name=src_var.name, + shape=src_var.shape, + dtype=src_var.dtype, + lod_level=src_var.lod_level, + error_clip=src_var.error_clip, + stop_gradient=src_var.stop_gradient, + is_data=src_var.is_data, + belong_to_optimizer=src_var.belong_to_optimizer, + **copied_kwargs + ) + + def _create_inter(self, dst_block, src_var): + dst_block.create_var( + type=src_var.type, + name=src_var.name, + shape=src_var.shape, + dtype=src_var.dtype, + lod_level=src_var.lod_level, + persistable=src_var.persistable, + error_clip=src_var.error_clip, + stop_gradient=src_var.stop_gradient, + is_data=src_var.is_data, + belong_to_optimizer=src_var.belong_to_optimizer, + ) + + def _create_var( + self, src_block, dst_block, src_varname, force_create=False + ): + + if not force_create: + src_var = src_block.var(src_varname) + else: + src_var = src_block._var_recursive(src_varname) + if src_var.type in __not_shape_var_type__: + persist = getattr(src_var, 'persistable', False) + dst_block.create_var( + type=src_var.type, + name=src_var.name, + persistable=persist, + error_clip=src_var.error_clip, + stop_gradient=src_var.stop_gradient, + is_data=src_var.is_data, + belong_to_optimizer=src_var.belong_to_optimizer, + ) + else: + if isinstance(src_var, Parameter): + self._create_param(dst_block, src_var) + else: + self._create_inter(dst_block, src_var) + + def _create_program(self, src_block, dst_block, src_op, force_create=False): + dst_op_desc = dst_block.desc.append_op() + dst_op_desc.copy_from(src_op.desc) + for input_varname in src_op.input_arg_names: + if src_block.has_var(input_varname) or ( + force_create and src_block._find_var_recursive(input_varname) + ): + self._create_var( + src_block, dst_block, input_varname, force_create + ) + for output_varname in src_op.output_arg_names: + if src_block.has_var(output_varname) or ( + force_create and src_block._find_var_recursive(output_varname) + ): + self._create_var( + src_block, dst_block, output_varname, force_create + ) + + def _get_pp_stage(self, rank): + pp_idx = None + for idx, process_mesh in enumerate(self._dist_context.process_meshes): + if rank in process_mesh.processes: + pp_idx = idx + break + return pp_idx + + def _task_stream(self): + cur_rank = int(os.getenv("PADDLE_TRAINER_ID", 0)) + trainer_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS", "").split(',') + nrank = len(trainer_endpoints) + num_of_functionality = 5 + + # compute current pp stage + pp_stages = len(self._dist_context.process_meshes) + cur_pp_stage = self._get_pp_stage(cur_rank) + + start_prog = Program() + cond_prog = Program() + end_prog = Program() + send_prog = Program() + recv_prog = Program() + + cond_var_name = None + send_vars_name = set() + recv_vars_name = {} + for ib, src_block in enumerate(self._program.blocks): + if ib == 0: + strat_block = start_prog.block(0) + end_block = end_prog.block(0) + + is_after_while_op = False + for op in src_block.ops: + if op.type == "while": + assert len(op.input('Condition')) == 1 + cond_var_name = op.input('Condition')[0] + is_after_while_op = True + continue + + if not is_after_while_op: + self._create_program( + src_block, strat_block, op, force_create=True + ) + else: + self._create_program( + src_block, end_block, op, force_create=True + ) + elif ib == 1: + send_block = send_prog.block(0) + recv_block = recv_prog.block(0) + + is_after_send_op = False + is_after_recv_op = False + for op in src_block.ops: + if op.type == "send_v2" and not is_after_send_op: + is_after_send_op = True + if cur_pp_stage == pp_stages - 1: + if op.type in ["c_sync_calc_stream", "nop"]: + continue + if ( + op.type not in ["recv_2", "assign"] + and op.has_attr('op_namescope') + and "/auto_parallel/reshard" + in op.attr('op_namescope') + ): + if ( + len(op.desc.input_arg_names()) > 0 + and "@RESHARD" + not in op.desc.input_arg_names()[0] + ): + send_vars_name.add( + op.desc.input_arg_names()[0] + ) + continue + if op.type == "send_v2": + continue + self._create_program( + src_block, send_block, op, force_create=True + ) + continue + + if ( + is_after_send_op + and not is_after_recv_op + and op.type == "recv_v2" + ): + is_after_recv_op = True + if op.has_attr( + 'op_namescope' + ) and "/auto_parallel/reshard" in op.attr( + 'op_namescope' + ): + var_name = op.desc.output_arg_names()[0] + index = var_name.find("@") + if index > 0: + old_var_name = var_name[:index] + else: + old_var_name = var_name + recv_vars_name[var_name] = old_var_name + if not src_block._find_var_recursive(old_var_name): + src_var = src_block._var_recursive(var_name) + recv_block.create_var( + type=src_var.type, + name=old_var_name, + shape=src_var.shape, + dtype=src_var.dtype, + lod_level=src_var.lod_level, + persistable=src_var.persistable, + error_clip=src_var.error_clip, + stop_gradient=src_var.stop_gradient, + is_data=src_var.is_data, + belong_to_optimizer=src_var.belong_to_optimizer, + ) + continue + + self._create_program( + src_block, recv_block, op, force_create=True + ) + continue + + if not is_after_send_op or not is_after_recv_op: + if cur_pp_stage == pp_stages - 1: + if op.type in ["c_sync_calc_stream", "nop"]: + continue + if ( + op.type not in ["recv_2", "assign"] + and op.has_attr('op_namescope') + and "/auto_parallel/reshard" + in op.attr('op_namescope') + ): + if ( + len(op.desc.input_arg_names()) > 0 + and "@RESHARD" + not in op.desc.input_arg_names()[0] + ): + send_vars_name.add( + op.desc.input_arg_names()[0] + ) + continue + if op.type == "send_v2": + continue + self._create_program( + src_block, send_block, op, force_create=True + ) + + if is_after_send_op and is_after_recv_op: + if op.has_attr( + 'op_namescope' + ) and "/auto_parallel/reshard" in op.attr( + 'op_namescope' + ): + var_name = op.desc.output_arg_names()[0] + index = var_name.find("@") + if index > 0: + old_var_name = var_name[:index] + else: + old_var_name = var_name + recv_vars_name[var_name] = old_var_name + if not src_block._find_var_recursive(old_var_name): + src_var = src_block._var_recursive(var_name) + recv_block.create_var( + type=src_var.type, + name=old_var_name, + shape=src_var.shape, + dtype=src_var.dtype, + lod_level=src_var.lod_level, + persistable=src_var.persistable, + error_clip=src_var.error_clip, + stop_gradient=src_var.stop_gradient, + is_data=src_var.is_data, + belong_to_optimizer=src_var.belong_to_optimizer, + ) + continue + + for in_name in op.desc.input_arg_names(): + if in_name in recv_vars_name: + op.desc._rename_input( + in_name, recv_vars_name[in_name] + ) + self._create_program( + src_block, recv_block, op, force_create=True + ) + else: + raise Exception("Only support generation condition.") + + start_prog._sync_with_cpp() + end_prog._sync_with_cpp() + send_prog._sync_with_cpp() + recv_prog._sync_with_cpp() + + assert cond_var_name is not None + + send_task_node_var_dtype = {} + send_task_node_var_shape = {} + recv_task_node_var_dtype = {} + recv_task_node_var_shape = {} + for var_name in list(send_vars_name): + var = send_prog.global_block().vars[var_name] + dtype = str(var.dtype) + send_task_node_var_dtype[var_name] = dtype[ + dtype.find("paddle.") + len("paddle.") : + ] + send_task_node_var_shape[var_name] = var.shape + for var_name in list(set(recv_vars_name.values())): + var = recv_prog.global_block().vars[var_name] + dtype = str(var.dtype) + recv_task_node_var_dtype[var_name] = dtype[ + dtype.find("paddle.") + len("paddle.") : + ] + recv_task_node_var_shape[var_name] = var.shape + + vars_to_dtype = [] + vars_to_shape = [] + if len(send_task_node_var_dtype) > 0: + assert len(recv_task_node_var_dtype) == 0 + vars_to_dtype = send_task_node_var_dtype + vars_to_shape = send_task_node_var_shape + if len(recv_task_node_var_dtype) > 0: + assert len(send_task_node_var_dtype) == 0 + vars_to_dtype = recv_task_node_var_dtype + vars_to_shape = recv_task_node_var_shape + + start_task_node = TaskNode( + rank=cur_rank, + max_run_times=self._acc_steps, + node_type="Start", + task_id=int(cur_rank * num_of_functionality + 0), + program=start_prog, + lazy_initialize=True, + ) + cond_task_node = TaskNode( + rank=cur_rank, + max_run_times=self._acc_steps, + node_type="Cond", + task_id=int(cur_rank * num_of_functionality + 1), + program=cond_prog, + cond_var_name=cond_var_name, + lazy_initialize=True, + ) + send_task_node = TaskNode( + rank=cur_rank, + max_run_times=self._acc_steps, + node_type="Compute", + task_id=int(cur_rank * num_of_functionality + 2), + program=send_prog, + lazy_initialize=True, + ) + recv_task_node = TaskNode( + rank=cur_rank, + max_run_times=self._acc_steps, + node_type="Compute", + task_id=int(cur_rank * num_of_functionality + 3), + program=recv_prog, + lazy_initialize=True, + vars_to_dtype=vars_to_dtype, + vars_to_shape=vars_to_shape, + ) + end_task_node = TaskNode( + rank=cur_rank, + max_run_times=self._acc_steps, + node_type="Compute", + task_id=int(cur_rank * num_of_functionality + 4), + program=end_prog, + lazy_initialize=True, + ) + + # add dependencies for task nodes intra stage + inf = -1 + pp_buff_size = int(pp_stages - cur_pp_stage) + start_task_node.add_downstream_task( + cond_task_node.task_id(), self._gen_bsz + ) + print( + "Task ", + start_task_node.task_id(), + "'s downstream is:", + cond_task_node.task_id(), + ", buffer size is:", + self._gen_bsz, + ) + cond_task_node.add_upstream_task( + start_task_node.task_id(), self._gen_bsz + ) + print( + "Task ", + cond_task_node.task_id(), + "'s upstream is:", + start_task_node.task_id(), + ", buffer size is:", + self._gen_bsz, + ) + cond_task_node.add_downstream_task(send_task_node.task_id(), inf) + print( + "Task ", + cond_task_node.task_id(), + "'s downstream is:", + send_task_node.task_id(), + ", buffer size is:", + inf, + ) + send_task_node.add_upstream_task(cond_task_node.task_id(), inf) + print( + "Task ", + send_task_node.task_id(), + "'s upstream is:", + cond_task_node.task_id(), + ", buffer size is:", + inf, + ) + send_task_node.add_downstream_task( + recv_task_node.task_id(), pp_buff_size + ) + print( + "Task ", + send_task_node.task_id(), + "'s downstream is:", + recv_task_node.task_id(), + ", buffer size is:", + pp_buff_size, + ) + recv_task_node.add_upstream_task(send_task_node.task_id(), pp_buff_size) + print( + "Task ", + recv_task_node.task_id(), + "'s upstream is:", + send_task_node.task_id(), + ", buffer size is:", + pp_buff_size, + ) + recv_task_node.add_downstream_task( + cond_task_node.task_id(), inf, core.DependType.LOOP + ) + print( + "Task ", + recv_task_node.task_id(), + "'s downstream is:", + cond_task_node.task_id(), + ", buffer size is:", + inf, + ) + cond_task_node.add_upstream_task( + recv_task_node.task_id(), inf, core.DependType.LOOP + ) + print( + "Task ", + cond_task_node.task_id(), + "'s upstream is:", + recv_task_node.task_id(), + ", buffer size is:", + inf, + ) + cond_task_node.add_downstream_task( + end_task_node.task_id(), inf, core.DependType.STOP_LOOP + ) + print( + "Task ", + cond_task_node.task_id(), + "'s downstream is:", + end_task_node.task_id(), + ", buffer size is:", + inf, + ) + end_task_node.add_upstream_task( + cond_task_node.task_id(), inf, core.DependType.STOP_LOOP + ) + print( + "Task ", + end_task_node.task_id(), + "'s upstream is:", + cond_task_node.task_id(), + ", buffer size is:", + inf, + ) + + # add dependencies for task nodes inter stage + # get upstream ranks and downstream ranks of cur_rank + up_down_streams = self._dist_context.up_down_streams + pp_upstream_ranks = up_down_streams.ups(cur_rank) + pp_downstream_ranks = up_down_streams.downs(cur_rank) + + for upstream_rank in pp_upstream_ranks: + upstream_pp_stage = self._get_pp_stage(upstream_rank) + if upstream_pp_stage < pp_stages - 1: + upstream_task_id = int(upstream_rank * num_of_functionality + 2) + send_task_node.add_upstream_task(upstream_task_id) + print( + "Task ", + send_task_node.task_id(), + "'s upstream is:", + upstream_task_id, + ", buffer size is:", + 2, + ) + else: + upstream_task_id = int(upstream_rank * num_of_functionality + 3) + recv_task_node.add_upstream_task(upstream_task_id) + print( + "Task ", + recv_task_node.task_id(), + "'s upstream is:", + upstream_task_id, + ", buffer size is:", + 2, + ) + for downstream_rank in pp_downstream_ranks: + if cur_pp_stage < pp_stages - 1: + downstream_task_id = int( + downstream_rank * num_of_functionality + 2 + ) + send_task_node.add_downstream_task(downstream_task_id) + print( + "Task ", + send_task_node.task_id(), + "'s downstream is:", + downstream_task_id, + ", buffer size is:", + 2, + ) + else: + downstream_task_id = int( + downstream_rank * num_of_functionality + 3 + ) + recv_task_node.add_downstream_task(downstream_task_id) + print( + "Task ", + recv_task_node.task_id(), + "'s downstream is:", + downstream_task_id, + ", buffer size is:", + 2, + ) + + task_id_to_rank = {} + for i in range(nrank): + for j in range(num_of_functionality): + task_id_to_rank[int(i * num_of_functionality + j)] = i + self._program._pipeline_opt = { + "fleet_opt": { + 'tasks': [ + start_task_node, + cond_task_node, + send_task_node, + recv_task_node, + end_task_node, + ], + 'task_id_to_rank': task_id_to_rank, + 'num_micro_batches': self._acc_steps, + 'inference_generation': True, + } + } diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/generation_pipeline_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/generation_pipeline_pass_unittest.py new file mode 100644 index 0000000000000..4a54b99df0dba --- /dev/null +++ b/python/paddle/fluid/tests/unittests/auto_parallel/generation_pipeline_pass_unittest.py @@ -0,0 +1,177 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +import paddle.nn.functional as F +from paddle import nn +from paddle.distributed.fleet import auto + +_g_mesh = auto.ProcessMesh([0, 1]) +PP_MESH_0 = auto.ProcessMesh([0]) +PP_MESH_1 = auto.ProcessMesh([1]) + +image_size = 1024 +class_num = 10 + + +class MyDataset(paddle.io.Dataset): + def __init__(self, num_samples): + super().__init__() + self.num_samples = num_samples + + def __getitem__(self, index): + input = np.random.uniform(size=image_size).astype("float32") + input = np.random.uniform(size=image_size).astype("float32") + return input, input + + def __len__(self): + return self.num_samples + + +class MLPLayer(nn.Layer): + def __init__( + self, + hidden_size=1024, + intermediate_size=4 * 1024, + dropout_ratio=0.1, + initializer_range=0.02, + ): + super().__init__() + d_model = hidden_size + dim_feedforward = intermediate_size + weight_attr = paddle.ParamAttr( + initializer=nn.initializer.Normal(mean=0.0, std=initializer_range) + ) + bias_attr = None + + self.linear0 = nn.Linear( + d_model, dim_feedforward, weight_attr, bias_attr=bias_attr + ) + self.linear1 = nn.Linear( + dim_feedforward, d_model, weight_attr, bias_attr=bias_attr + ) + self.linear2 = nn.Linear(d_model, 1, weight_attr, bias_attr=bias_attr) + self.norm = nn.LayerNorm(d_model, epsilon=1e-5) + self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train") + + def forward(self, input): + out = auto.shard_op(self.norm, PP_MESH_0)(input) + out = self.linear0(out) + out = F.gelu(out, approximate=True) + out = auto.shard_op(self.linear1, PP_MESH_1)(out) + out = self.dropout(out) + out = self.linear2(out) + return out + + +class GEN(nn.Layer): + def __init__(self, mlp): + super().__init__() + self.mlp = mlp + + def forward(self, input): + model_kwargs = {} + + output = self.mlp(input) + + cur_step = paddle.full([1], 0, dtype='int64') + total_step = paddle.full([1], 10, dtype='int64') + + model_kwargs['input'] = input + model_kwargs['output'] = output + + while cur_step < total_step: + + out = self.mlp(model_kwargs['input']) + model_kwargs['res'] = out + paddle.increment(cur_step) + + auto.shard_op(paddle.assign, _g_mesh)(model_kwargs['input'], out) + + output = F.gelu(model_kwargs['input'], approximate=True) + + return output, cur_step + + +def get_model(): + + with paddle.LazyGuard(): + mlp = MLPLayer() + gen = GEN(mlp) + return gen + + +class TestGenerationPipeline(unittest.TestCase): + def test_pp2(self): + + model = get_model() + + strategy = auto.Strategy() + pipeline = strategy.pipeline + pipeline.enable = True + pipeline.schedule_mode = "stream" + pipeline.generation_batch_size = 4 + pipeline.accumulate_steps = 4 + engine = auto.Engine(model, strategy=strategy) + + engine.prepare( + inputs_spec=paddle.static.InputSpec( + shape=[2, 1024], name='input', dtype='float32' + ), + labels_spec=paddle.static.InputSpec( + shape=[2, 1024], name='label', dtype='float32' + ), + mode="eval", + ) + + train_data = MyDataset(50 * 2) + train_dataloader = engine._prepare_dataloader_from_generator( + dataset=train_data, + capacity=70, + iterable=False, + batch_size=2, + epochs=1, + steps_per_epoch=100, + ) + engine._prepare_reader() + + fleet_opt = engine.main_program._pipeline_opt['fleet_opt'] + assert len(fleet_opt['tasks']) == 5 + assert fleet_opt['inference_generation'] + assert fleet_opt['num_micro_batches'] == 4 + num_task_in_rank = 5 + for idx, (task_id, rank_id) in enumerate( + fleet_opt['task_id_to_rank'].items() + ): + assert ( + task_id == rank_id * num_task_in_rank + idx % num_task_in_rank + ) + + train_dataloader._inner_dataloader.start() + try: + engine._executor.run( + engine.main_program, use_program_cache=False, return_numpy=False + ) + except paddle.fluid.core.EOFException: + print("test done") + train_dataloader._inner_dataloader.reset() + train_dataloader._inner_dataloader.start() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py index 029f33f8c647e..10f78aedd4fb9 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py @@ -247,6 +247,7 @@ def test_deepcopy(self): "_backup_serial_main_program_stack", "_backup_serial_startup_program_stack", "_pass_context", + "_tensor_nodes_with_same_name", ] for i in range(len(copy_list)): diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_bf16.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_bf16.py index c83c098959c13..411cee39eca54 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_bf16.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_bf16.py @@ -203,7 +203,7 @@ def test_bf16_pass(self): bf16_o1_engine.prepare( inputs_spec=inputs_spec, labels_spec=labels_spec, mode="train" ) - self.check_program(bf16_o1_engine._dist_main_progs["train"][0]) + self.check_program(bf16_o1_engine.main_program) print("BF16!check program successfully!") diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_generation_pipeline.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_generation_pipeline.py new file mode 100644 index 0000000000000..598359cd51685 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_generation_pipeline.py @@ -0,0 +1,57 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import subprocess +import sys +import tempfile +import unittest + + +class TestGenerationPipeline(unittest.TestCase): + def test_pp2(self): + file_dir = os.path.dirname(os.path.abspath(__file__)) + launch_model_path = os.path.join( + file_dir, "generation_pipeline_pass_unittest.py" + ) + + if os.environ.get("WITH_COVERAGE", "OFF") == "ON": + coverage_args = ["-m", "coverage", "run", "--branch", "-p"] + else: + coverage_args = [] + + tmp_dir = tempfile.TemporaryDirectory() + cmd = ( + [sys.executable, "-u"] + + coverage_args + + [ + "-m", + "paddle.distributed.launch", + "--devices", + "0,1", + "--log_dir", + tmp_dir.name, + launch_model_path, + ] + ) + + process = subprocess.Popen(cmd) + process.wait() + self.assertEqual(process.returncode, 0) + + tmp_dir.cleanup() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py index f7c4fb0e94e89..11c817b9baeea 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py @@ -180,6 +180,9 @@ def check_send_recv_result(dist_main_prog, rank_id): return send_result and recv_result +@unittest.skipIf( + not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA" +) class TestMLPReshard(unittest.TestCase): def test_mlp_serial(self): global _global_parallel_strategy From 3ece0ece6428f54e3e2060299e0a43dc005eb24f Mon Sep 17 00:00:00 2001 From: ShenLiang <1422485404@qq.com> Date: Wed, 12 Apr 2023 06:40:25 -0500 Subject: [PATCH 57/59] fix bug of mp (#52789) --- .../distributed/fleet/layers/mpu/mp_ops.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py index 08093710b3b89..884af3a441431 100644 --- a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py +++ b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py @@ -46,7 +46,15 @@ def _c_identity(tensor, group=None): class c_identity_eager(PyLayer): @staticmethod def forward(ctx, tensor): - return tensor + return _legacy_C_ops.c_identity( + tensor, + 'use_calc_stream', + True, + 'ring_id', + group.id, + 'use_model_parallel', + True, + ) @staticmethod def backward(ctx, dy): @@ -249,7 +257,15 @@ def forward( @staticmethod def backward(ctx, dy): - return dy + return _legacy_C_ops.c_identity( + dy, + 'use_calc_stream', + True, + 'ring_id', + ctx.ring_id, + 'use_model_parallel', + True, + ) return mp_allreduce_eager.apply( tensor, group, use_calc_stream, use_model_parallel From e54e2bc8bcc7f4e75edf449320cbbf8e8047377e Mon Sep 17 00:00:00 2001 From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com> Date: Wed, 12 Apr 2023 20:15:24 +0800 Subject: [PATCH 58/59] Support cuda12+ arch and Hopper arch. Discard 30 and Kepler arch. (#52285) * slight modify * support cuda12+ arch, Hopper arch and discard 30 arch * add arch 90 for each paddle_known_gpu_archs12 * for comments --- cmake/cuda.cmake | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 9c1d71914bc21..82c4ec14d9ef8 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -7,28 +7,33 @@ if(WITH_NV_JETSON) set(paddle_known_gpu_archs "53 62 72") set(paddle_known_gpu_archs10 "53 62 72") set(paddle_known_gpu_archs11 "53 62 72 87") + set(paddle_known_gpu_archs12 "53 62 72 87 90") elseif(NEW_RELEASE_ALL) message("Using New Release Strategy - All Arches Packge") add_definitions(-DNEW_RELEASE_ALL) - set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86") - set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75") + set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90") + set(paddle_known_gpu_archs10 "50 52 60 61 70 75") set(paddle_known_gpu_archs11 "50 60 61 70 75 80") + set(paddle_known_gpu_archs12 "50 60 61 70 75 80 90") elseif(NEW_RELEASE_PYPI) message("Using New Release Strategy - Cubin Packge") add_definitions(-DNEW_RELEASE_PYPI) - set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86") + set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90") set(paddle_known_gpu_archs10 "") set(paddle_known_gpu_archs11 "61 70 75 80") + set(paddle_known_gpu_archs12 "61 70 75 80 90") elseif(NEW_RELEASE_JIT) message("Using New Release Strategy - JIT Packge") add_definitions(-DNEW_RELEASE_JIT) - set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86") - set(paddle_known_gpu_archs10 "35 50 60 70 75") - set(paddle_known_gpu_archs11 "35 50 60 70 75 80") + set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90") + set(paddle_known_gpu_archs10 "50 60 70 75") + set(paddle_known_gpu_archs11 "50 60 70 75 80") + set(paddle_known_gpu_archs12 "50 60 70 75 80 90") else() - set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80") + set(paddle_known_gpu_archs "50 52 60 61 70 75 80 90") set(paddle_known_gpu_archs10 "50 52 60 61 70 75") set(paddle_known_gpu_archs11 "52 60 61 70 75 80") + set(paddle_known_gpu_archs12 "52 60 61 70 75 80 90") endif() ###################################################################################### @@ -100,12 +105,12 @@ endfunction() function(select_nvcc_arch_flags out_variable out_arch_bin) # List of arch names set(archs_names - "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" + "Hopper" "All" "Manual") set(archs_name_default "Auto") @@ -144,9 +149,7 @@ function(select_nvcc_arch_flags out_variable out_arch_bin) unset(CUDA_ARCH_PTX CACHE) endif() - if(${CUDA_ARCH_NAME} STREQUAL "Kepler") - set(cuda_arch_bin "30 35") - elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") + if(${CUDA_ARCH_NAME} STREQUAL "Maxwell") if(WITH_NV_JETSON) set(cuda_arch_bin "53") else() @@ -176,6 +179,8 @@ function(select_nvcc_arch_flags out_variable out_arch_bin) set(cuda_arch_bin "80 86") endif() endif() + elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper") + set(cuda_arch_bin "90") elseif(${CUDA_ARCH_NAME} STREQUAL "All") set(cuda_arch_bin ${paddle_known_gpu_archs}) elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") @@ -266,6 +271,11 @@ elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") +elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 13.0) # CUDA 12.0+ + set(paddle_known_gpu_archs "${paddle_known_gpu_archs12} 86") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") endif() if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) From 57201d9d2b5504f41d30c33781f4d8c9c0ff36df Mon Sep 17 00:00:00 2001 From: Wang Xin Date: Wed, 12 Apr 2023 20:23:43 +0800 Subject: [PATCH 59/59] add autogen code for clip_by_norm op (#52743) * add autogen code for clip_by_norm op * bug fixed --- paddle/fluid/operators/clip_by_norm_op.cc | 29 ---------------------- paddle/phi/api/yaml/legacy_ops.yaml | 8 ------ paddle/phi/api/yaml/op_compat.yaml | 6 +++++ paddle/phi/api/yaml/ops.yaml | 9 +++++++ paddle/phi/ops/compat/clip_by_norm_sig.cc | 30 ----------------------- 5 files changed, 15 insertions(+), 67 deletions(-) delete mode 100644 paddle/fluid/operators/clip_by_norm_op.cc delete mode 100644 paddle/phi/ops/compat/clip_by_norm_sig.cc diff --git a/paddle/fluid/operators/clip_by_norm_op.cc b/paddle/fluid/operators/clip_by_norm_op.cc deleted file mode 100644 index 3805e11d752e3..0000000000000 --- a/paddle/fluid/operators/clip_by_norm_op.cc +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/clip_by_norm_op.h" -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace ops = paddle::operators; - -DECLARE_INFER_SHAPE_FUNCTOR(clip_by_norm, - ClipByNormInferShapeFunctor, - PD_INFER_META(phi::ClipByNormInferMeta)); - -REGISTER_OP_WITHOUT_GRADIENT(clip_by_norm, - ops::ClipByNormOp, - ops::ClipByNormOpMaker, - ClipByNormInferShapeFunctor); diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 100329f555bea..abd42601a8fa0 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -222,14 +222,6 @@ kernel : func : class_center_sample -- op : clip_by_norm - args : (Tensor x, float max_norm) - output : Tensor(out) - infer_meta : - func : ClipByNormInferMeta - kernel : - func : clip_by_norm - - op : coalesce_tensor args : (Tensor[] input, DataType dtype, bool copy_data = false, bool set_constant = false, bool persist_output = false, float constant = 0.0, bool use_align = true, int align_size = -1, int size_of_dtype = -1, int64_t[] concated_shapes = {}, int64_t[] concated_ranks = {}) output : Tensor[](output){input.size()}, Tensor(fused_output) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index bfbab2d52af4e..44f065feb7d72 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -356,6 +356,12 @@ extra : attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] +- op : clip_by_norm + inputs : + x : X + outputs : + out : Out + - op : complex backward : complex_grad inputs : diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 980505ddeb2f1..20adbd31aca06 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -355,6 +355,15 @@ data_type : x backward : clip_grad +- op : clip_by_norm + args : (Tensor x, float max_norm) + output : Tensor(out) + infer_meta : + func : ClipByNormInferMeta + kernel : + func : clip_by_norm {dense -> dense} + clip_by_norm_sr {selected_rows -> selected_rows} + - op : complex args : (Tensor real, Tensor imag) output : Tensor diff --git a/paddle/phi/ops/compat/clip_by_norm_sig.cc b/paddle/phi/ops/compat/clip_by_norm_sig.cc deleted file mode 100644 index 8a2cecc0293d3..0000000000000 --- a/paddle/phi/ops/compat/clip_by_norm_sig.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature ClipByNormOpArgumentMapping(const ArgumentMappingContext& ctx) { - if (ctx.IsDenseTensorInput("X")) { - return KernelSignature("clip_by_norm", {"X"}, {"max_norm"}, {"Out"}); - } else if (ctx.IsSelectedRowsInput("X")) { - return KernelSignature("clip_by_norm_sr", {"X"}, {"max_norm"}, {"Out"}); - } - return KernelSignature("unregistered", {}, {}, {}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(clip_by_norm, phi::ClipByNormOpArgumentMapping);