From d9501622526a815c85c5256921b65ae5f919283e Mon Sep 17 00:00:00 2001 From: 0x45f Date: Sun, 23 Apr 2023 13:05:34 +0000 Subject: [PATCH 01/12] [Dy2St]Fix x grad names when high order gradient --- .../eager/to_static/run_program_op_node.h | 8 ++++++++ python/paddle/jit/dy2static/partial_program.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index b4deb4e4ac306..52ad153490a7c 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -691,6 +691,7 @@ class GradNodeRunProgram : public egr::GradNodeBase { std::vector *x_grad) { auto x_grad_names = PADDLE_GET_CONST(std::vector, attrs_.at("x_grad_names")); +<<<<<<< HEAD PADDLE_ENFORCE_EQ( x.size(), x_grad_names.size(), @@ -699,6 +700,13 @@ class GradNodeRunProgram : public egr::GradNodeBase { "But received x.size() = %d, x_grad_names.size() = %d", x.size(), x_grad_names.size())); +======= + PADDLE_ENFORCE_EQ(x.size(), + x_grad_names.size(), + paddle::platform::errors::InvalidArgument( + "The x.size() and " + "x_grad_names.size() should be equal.")); +>>>>>>> [Dy2St]Fix x grad names when high order gradient // TODO(dev): Need an elegant way to determine inforamtion of grad_tensor, // such as: name, tensor type(DenseTensor or SelectedRows). diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index ad2e62b9e0461..5c0dceaa2a476 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -448,6 +448,18 @@ def get_forward_end_op_idx(self, program): paddle.utils._hash_with_id(program, self) ] + @LazyInitialized + def _out_grad_names(self): + return _out_grad_names( + self._train_program.desc, + self.get_forward_end_op_idx(self._train_program), + len(self._outputs.var_ids), + ) + + @LazyInitialized + def _x_grad_names(self): + return _param_grad_names(self._train_program.desc, self._inputs) + @property def program(self): """ @@ -736,9 +748,15 @@ def _prepare_attributes(self): 'param_grad_names', self._grad_var_names.get('param', []), 'out_grad_names', +<<<<<<< HEAD self._grad_var_names.get('out', []), 'x_grad_names', self._grad_var_names.get('x', []), +======= + self._out_grad_names, + 'x_grad_names', + self._x_grad_names, +>>>>>>> [Dy2St]Fix x grad names when high order gradient ) ) if self._cuda_graph_capture_mode: From cbad143660aa489adb78e663dd59ec1d0516090b Mon Sep 17 00:00:00 2001 From: 0x45f Date: Sun, 23 Apr 2023 13:23:20 +0000 Subject: [PATCH 02/12] Polish error msg --- paddle/fluid/eager/to_static/run_program_op_node.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 52ad153490a7c..562838032fa18 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -691,7 +691,6 @@ class GradNodeRunProgram : public egr::GradNodeBase { std::vector *x_grad) { auto x_grad_names = PADDLE_GET_CONST(std::vector, attrs_.at("x_grad_names")); -<<<<<<< HEAD PADDLE_ENFORCE_EQ( x.size(), x_grad_names.size(), @@ -700,13 +699,6 @@ class GradNodeRunProgram : public egr::GradNodeBase { "But received x.size() = %d, x_grad_names.size() = %d", x.size(), x_grad_names.size())); -======= - PADDLE_ENFORCE_EQ(x.size(), - x_grad_names.size(), - paddle::platform::errors::InvalidArgument( - "The x.size() and " - "x_grad_names.size() should be equal.")); ->>>>>>> [Dy2St]Fix x grad names when high order gradient // TODO(dev): Need an elegant way to determine inforamtion of grad_tensor, // such as: name, tensor type(DenseTensor or SelectedRows). @@ -763,4 +755,4 @@ class GradNodeRunProgram : public egr::GradNodeBase { paddle::framework::AttributeMap attrs_; bool executed_{false}; -}; +}; \ No newline at end of file From 8bd76d7295744a9d6c4e9edbaf6f552e11ab607b Mon Sep 17 00:00:00 2001 From: 0x45f Date: Sun, 23 Apr 2023 14:00:15 +0000 Subject: [PATCH 03/12] Add inputs var to backward in dy2st --- .../paddle/jit/dy2static/partial_program.py | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index 5c0dceaa2a476..e0d0395b9480f 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -151,19 +151,16 @@ class PartialProgramLayer: """ PartialProgramLayer wraps all the ops from layers decorated by `@to_static` and execute them as a static subgraph. - .. note:: **1. This is a very low level API. Users should not use this API directly. Please use `partial_program_from(concrete_program)` to create it. **2. LoDTensorArray is not currently supported in the output. - Args: main_program(Program): The main program that contains ops need to be executed. inputs(list[Variable]): The input list of the decorated function by `@to_static`. outputs(list[Variable]): The output list of the decorated function by `@to_static`. parameters(list[Tensor]|None): All trainable parameters included in the program. Default None. - Returns: Layer: A Layer object that run all ops internally in static graph mode. """ @@ -448,18 +445,6 @@ def get_forward_end_op_idx(self, program): paddle.utils._hash_with_id(program, self) ] - @LazyInitialized - def _out_grad_names(self): - return _out_grad_names( - self._train_program.desc, - self.get_forward_end_op_idx(self._train_program), - len(self._outputs.var_ids), - ) - - @LazyInitialized - def _x_grad_names(self): - return _param_grad_names(self._train_program.desc, self._inputs) - @property def program(self): """ @@ -536,9 +521,7 @@ def backward_program(self): Can't just return paddle.static.Program(), because self.backward_program is a property, whenever we call this method, a tmp Program() object is created and is gc immediatly after executed the following line in PartialProgramLayer.__call__. - >>> self.backward_program.desc.block(0), - When we access RunProgramAPI, it's possible to get an invalid backward_program address. """ return self._empty_backward_program_for_eval @@ -565,7 +548,6 @@ def forward(self, in): x = 2 * in # <---- x is a non-leaf node in program. y = x + 3 return x, y - loss = forward(in)[0].sum() loss.backward() # <----- x@grad will be overwrited by elementwise_add_grad Op """ @@ -748,15 +730,9 @@ def _prepare_attributes(self): 'param_grad_names', self._grad_var_names.get('param', []), 'out_grad_names', -<<<<<<< HEAD self._grad_var_names.get('out', []), 'x_grad_names', self._grad_var_names.get('x', []), -======= - self._out_grad_names, - 'x_grad_names', - self._x_grad_names, ->>>>>>> [Dy2St]Fix x grad names when high order gradient ) ) if self._cuda_graph_capture_mode: @@ -1142,4 +1118,4 @@ def add_build_strategy_for( builded_program = paddle.static.Program() for var in program.block(0).vars.values(): builded_program.block(0)._clone_variable(var, False) - return builded_program + return builded_program \ No newline at end of file From 573c65e9d609ee9230efd4a50ceb08695cefb75f Mon Sep 17 00:00:00 2001 From: 0x45f Date: Sun, 23 Apr 2023 14:17:44 +0000 Subject: [PATCH 04/12] Fix error --- python/paddle/jit/dy2static/partial_program.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index e0d0395b9480f..65854279e441b 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -151,16 +151,19 @@ class PartialProgramLayer: """ PartialProgramLayer wraps all the ops from layers decorated by `@to_static` and execute them as a static subgraph. + .. note:: **1. This is a very low level API. Users should not use this API directly. Please use `partial_program_from(concrete_program)` to create it. **2. LoDTensorArray is not currently supported in the output. + Args: main_program(Program): The main program that contains ops need to be executed. inputs(list[Variable]): The input list of the decorated function by `@to_static`. outputs(list[Variable]): The output list of the decorated function by `@to_static`. parameters(list[Tensor]|None): All trainable parameters included in the program. Default None. + Returns: Layer: A Layer object that run all ops internally in static graph mode. """ @@ -521,7 +524,9 @@ def backward_program(self): Can't just return paddle.static.Program(), because self.backward_program is a property, whenever we call this method, a tmp Program() object is created and is gc immediatly after executed the following line in PartialProgramLayer.__call__. + >>> self.backward_program.desc.block(0), + When we access RunProgramAPI, it's possible to get an invalid backward_program address. """ return self._empty_backward_program_for_eval @@ -548,6 +553,7 @@ def forward(self, in): x = 2 * in # <---- x is a non-leaf node in program. y = x + 3 return x, y + loss = forward(in)[0].sum() loss.backward() # <----- x@grad will be overwrited by elementwise_add_grad Op """ From 65f5cf9b73b389cfd053a522e2dbff45db8a892f Mon Sep 17 00:00:00 2001 From: 0x45f Date: Mon, 24 Apr 2023 07:14:54 +0000 Subject: [PATCH 05/12] Get grad names for backward API --- python/paddle/fluid/backward.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index e6c01f836851a..f07f8e508c9e3 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -2380,11 +2380,19 @@ def _find_op_path_( return op_path +<<<<<<< HEAD def calc_gradient_helper( targets, inputs, target_gradients=None, no_grad_set=None ): ''' Calculate gradient and return grad_info_map +======= +def _calc_and_ret_grad_infp_map( + targets, inputs, target_gradients=None, no_grad_set=None +): + ''' + For Dy2St +>>>>>>> Get grad names for backward API ''' targets = _as_list(targets) inputs = _as_list(inputs) @@ -2499,8 +2507,13 @@ def calc_gradient_helper( _append_backward_vars_(block, fwd_op_num, grad_to_var, grad_info_map) prog._sync_with_cpp() return grad_info_map +<<<<<<< HEAD + + +======= +>>>>>>> Get grad names for backward API def _get_grad_vars(grad_info_map, inputs): inputs = _as_list(inputs) grad_vars = [] @@ -2538,10 +2551,14 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): will be None """ +<<<<<<< HEAD # NOTE: If you want to modify the logic of calc_gradient, please modify # it inside the calc_gradient_helper and _get_grad_vars functions # to ensure the correctness of dy2st mode. grad_info_map = calc_gradient_helper( +======= + grad_info_map = _calc_and_ret_grad_infp_map( +>>>>>>> Get grad names for backward API targets, inputs, target_gradients=target_gradients, From ad699379f0da6b3cda926080b0102050dbb3486c Mon Sep 17 00:00:00 2001 From: 0x45f Date: Mon, 24 Apr 2023 08:40:04 +0000 Subject: [PATCH 06/12] Fix save load --- python/paddle/jit/dy2static/partial_program.py | 8 ++++++++ python/paddle/jit/translated_layer.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index 65854279e441b..1af500e23ea1c 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -27,11 +27,15 @@ from paddle.optimizer.lr import LRScheduler from . import logging_utils +<<<<<<< HEAD from .utils import ( RETURN_NO_VALUE_MAGIC_NUM, backend_guard, construct_grad_names, ) +======= +from .utils import RETURN_NO_VALUE_MAGIC_NUM, backend_guard +>>>>>>> Fix save load __all__ = [] @@ -643,7 +647,11 @@ def _append_backward_desc(self, main_program): (framework.Variable, list, tuple), 'paddle.static.gradients', ) +<<<<<<< HEAD grad_info_map = backward.calc_gradient_helper( +======= + grad_info_map = backward._calc_and_ret_grad_infp_map( +>>>>>>> Fix save load targets=targets, inputs=[] ) diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py index 06a89ce36401a..d9e3137a06259 100644 --- a/python/paddle/jit/translated_layer.py +++ b/python/paddle/jit/translated_layer.py @@ -1646,4 +1646,4 @@ def _output_spec(self, method_name='forward'): ) output_spec.append(spec) - return output_spec + return output_spec \ No newline at end of file From 86209037e64ae1ca687e83badf0b9af31c0c036a Mon Sep 17 00:00:00 2001 From: 0x45f Date: Mon, 24 Apr 2023 09:05:20 +0000 Subject: [PATCH 07/12] Polish code --- python/paddle/fluid/backward.py | 19 +------------------ .../paddle/jit/dy2static/partial_program.py | 8 -------- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index f07f8e508c9e3..0a956e59fb346 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -2380,19 +2380,11 @@ def _find_op_path_( return op_path -<<<<<<< HEAD def calc_gradient_helper( targets, inputs, target_gradients=None, no_grad_set=None ): ''' Calculate gradient and return grad_info_map -======= -def _calc_and_ret_grad_infp_map( - targets, inputs, target_gradients=None, no_grad_set=None -): - ''' - For Dy2St ->>>>>>> Get grad names for backward API ''' targets = _as_list(targets) inputs = _as_list(inputs) @@ -2507,13 +2499,8 @@ def _calc_and_ret_grad_infp_map( _append_backward_vars_(block, fwd_op_num, grad_to_var, grad_info_map) prog._sync_with_cpp() return grad_info_map -<<<<<<< HEAD - - -======= ->>>>>>> Get grad names for backward API def _get_grad_vars(grad_info_map, inputs): inputs = _as_list(inputs) grad_vars = [] @@ -2551,14 +2538,10 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): will be None """ -<<<<<<< HEAD # NOTE: If you want to modify the logic of calc_gradient, please modify # it inside the calc_gradient_helper and _get_grad_vars functions # to ensure the correctness of dy2st mode. grad_info_map = calc_gradient_helper( -======= - grad_info_map = _calc_and_ret_grad_infp_map( ->>>>>>> Get grad names for backward API targets, inputs, target_gradients=target_gradients, @@ -2710,4 +2693,4 @@ def gradients_with_optimizer(program, optimizer, inputs=None, outputs=None): optimize_ops = optimizer.apply_gradients(pram_grads) - return optimize_ops, pram_grads + return optimize_ops, pram_grads \ No newline at end of file diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index 1af500e23ea1c..65854279e441b 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -27,15 +27,11 @@ from paddle.optimizer.lr import LRScheduler from . import logging_utils -<<<<<<< HEAD from .utils import ( RETURN_NO_VALUE_MAGIC_NUM, backend_guard, construct_grad_names, ) -======= -from .utils import RETURN_NO_VALUE_MAGIC_NUM, backend_guard ->>>>>>> Fix save load __all__ = [] @@ -647,11 +643,7 @@ def _append_backward_desc(self, main_program): (framework.Variable, list, tuple), 'paddle.static.gradients', ) -<<<<<<< HEAD grad_info_map = backward.calc_gradient_helper( -======= - grad_info_map = backward._calc_and_ret_grad_infp_map( ->>>>>>> Fix save load targets=targets, inputs=[] ) From 56ba3c3cdd73dc085f14a696fb8b01cc12f2eaf0 Mon Sep 17 00:00:00 2001 From: 0x45f Date: Tue, 25 Apr 2023 03:42:10 +0000 Subject: [PATCH 08/12] Add ut --- test/dygraph_to_static/test_gradname_parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/dygraph_to_static/test_gradname_parse.py b/test/dygraph_to_static/test_gradname_parse.py index 743f92f758f4f..6b83ac47f8857 100644 --- a/test/dygraph_to_static/test_gradname_parse.py +++ b/test/dygraph_to_static/test_gradname_parse.py @@ -162,4 +162,4 @@ def setUp(self): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file From 98e36fc399099ad3ba5861b2bb1900aae7ad9e65 Mon Sep 17 00:00:00 2001 From: cxxly Date: Fri, 21 Apr 2023 02:32:17 +0000 Subject: [PATCH 09/12] [prim] fix not support optional grad bugs in higher order autodiff --- paddle/fluid/prim/utils/static/composite_grad_desc_maker.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h index 1ef5161dc0714..5f48211a40cb9 100644 --- a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h +++ b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h @@ -143,8 +143,12 @@ class CompositeGradOpMakerBase { const std::string& name) { paddle::optional output_grad_opt; if (fwd_op_.Outputs().find(name) != fwd_op_.Outputs().end()) { + std::cout << "~~~~~~~~~~~~~~~name: " << name << std::endl; framework::VarDesc* output_grad_desc = this->SingleOutputGrad(name); + std::cout << "~~~~~~~~~~~~~~~output_grad_desc: " << output_grad_desc + << std::endl; if (!output_grad_desc) return output_grad_opt; + std::cout << "~~~~~~~~~~~~~~~not null pointer: " << std::endl; paddle::Tensor output_grad = paddle::Tensor(std::make_shared(output_grad_desc)); output_grad_opt = paddle::make_optional(output_grad); From 5ab7fdd053b21622b15e6d8999c4efbefc70413e Mon Sep 17 00:00:00 2001 From: cxxly Date: Fri, 21 Apr 2023 07:24:28 +0000 Subject: [PATCH 10/12] [prim] remove duplicate fill_any_like caused by infershape_for_composite --- .../eager/to_static/run_program_op_node.h | 2 +- .../utils/static/composite_grad_desc_maker.h | 11 +--- python/paddle/fluid/backward.py | 55 ++++++++++--------- .../tests/unittests/test_calc_gradient.py | 2 + .../paddle/jit/dy2static/partial_program.py | 2 +- python/paddle/jit/translated_layer.py | 2 +- test/dygraph_to_static/test_gradname_parse.py | 2 +- ...test_comp_get_grad_op_desc_prim_enabled.py | 5 ++ test/prim/test_comp_skip_op_set.py | 5 ++ 9 files changed, 46 insertions(+), 40 deletions(-) diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 562838032fa18..b4deb4e4ac306 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -755,4 +755,4 @@ class GradNodeRunProgram : public egr::GradNodeBase { paddle::framework::AttributeMap attrs_; bool executed_{false}; -}; \ No newline at end of file +}; diff --git a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h index 5f48211a40cb9..a8fe8d3e59616 100644 --- a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h +++ b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h @@ -143,12 +143,8 @@ class CompositeGradOpMakerBase { const std::string& name) { paddle::optional output_grad_opt; if (fwd_op_.Outputs().find(name) != fwd_op_.Outputs().end()) { - std::cout << "~~~~~~~~~~~~~~~name: " << name << std::endl; framework::VarDesc* output_grad_desc = this->SingleOutputGrad(name); - std::cout << "~~~~~~~~~~~~~~~output_grad_desc: " << output_grad_desc - << std::endl; if (!output_grad_desc) return output_grad_opt; - std::cout << "~~~~~~~~~~~~~~~not null pointer: " << std::endl; paddle::Tensor output_grad = paddle::Tensor(std::make_shared(output_grad_desc)); output_grad_opt = paddle::make_optional(output_grad); @@ -354,10 +350,7 @@ class CompositeGradOpMakerBase { framework::VarDesc* SingleOutputGrad(const std::string& name) const { auto* var = this->SingleForwardOutput(name); if (!var) { - PADDLE_THROW(platform::errors::InvalidArgument( - "GetSingleOutputGrad for %s_grad faild, if it is Optional input," - "please use GetOptionalSingleOutputGrad replaced. ", - name)); + return nullptr; } auto var_name = var->Name(); auto grad_var_name = framework::GradVarName(var_name); @@ -375,7 +368,7 @@ class CompositeGradOpMakerBase { return StaticCompositeContext::Instance().GetBlock()->FindVar( grad_var_name); } else { - return StaticCompositeContext::Instance().GetBlock()->Var(grad_var_name); + return nullptr; } } diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 0a956e59fb346..a2ea723415683 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1343,15 +1343,17 @@ def update_distop_context( if core._is_bwd_prim_enabled(): composite_block = program.clone().current_block() - # Infer shape for operators whose output haven't been created. + # Create output and infer shape for operators whose output haven't + # been created. for op in composite_block.ops: - if not all( - tuple( - composite_block._find_var_recursive(arg) - for arg in op.output_arg_names - ) - ): - infershape_for_composite(composite_block, op.desc) + for name in op.output_arg_names: + if not ( + composite_block.desc.has_var_recursive(name.encode()) + or name == core.empty_var_name() + ): + composite_block.create_var(name=name) + op.desc.infer_var_type(composite_block.desc) + op.desc.infer_shape(composite_block.desc) # add grad_op_desc by reversed ops for op in reversed(ops): @@ -1492,6 +1494,15 @@ def find_op_index(block_desc, cur_op_desc): or name in input_grad_names_set ) is_append_grad = False + + # NOTE: In primitive mode, the intermediate variable generated by + # decompositing raw grad op are not satisfied the rule of 'XX@GRAD', + # which will cause it be pruned according to current pruning logic. + # For simplicity, we treate all prmitive operators as one raw + # operator, and keep the pruning logic consistent with currently + # logic. The drawback of this solution is may lead to some primitive + # operators are not pruned, which is needed to fixed. + # FIXME: Optimize pruning logic from the perspective of whole graph. input_grad_names = [] for op_desc in grad_op_desc: input_grad_names += [ @@ -1499,20 +1510,20 @@ def find_op_index(block_desc, cur_op_desc): for name in op_desc.input_arg_names() if is_grad_name(name) ] + + # some code of gradient ops, like increment, are not very + # standard, there is no @GRAD in these ops' inputs. if len(input_grad_names) == 0: is_append_grad = True - break - - for op_desc in grad_op_desc: - - # some code of gradient ops, like increment, are not very - # standard, there is no @GRAD in these ops' inputs. + continue - if _some_in_set_(input_grad_names, input_grad_names_set): + if _some_in_set_(input_grad_names, input_grad_names_set): + is_append_grad = True + for op_desc in grad_op_desc: grad_op_descs.append(op_desc) - is_append_grad = True for name in op_desc.output_arg_names(): input_grad_names_set.add(name) + if is_append_grad: grad_to_var.update(op_grad_to_var) else: @@ -1774,18 +1785,8 @@ def infershape_for_composite(block, grad_op_desc): op_desc.check_attrs() op_desc.infer_var_type(block.desc) op_desc.infer_shape(block.desc) - for arg in op_desc.output_arg_names(): - if arg in new_vars: - _infer_var_data_type_shape_(arg, block) - grad_op_desc.copy_from(op_desc) - # NOTE: Some operator doesn't infer dtype correctly, this patch set the - # grad_var dtype same with corresponding forward variable. - for arg in grad_op_desc.output_arg_names(): - if arg in new_vars: - _infer_var_data_type_shape_(arg, block) - def _rename_grad_( block, start_op_idx, grad_to_var, target_grad_map, skip_rename_var_list @@ -2693,4 +2694,4 @@ def gradients_with_optimizer(program, optimizer, inputs=None, outputs=None): optimize_ops = optimizer.apply_gradients(pram_grads) - return optimize_ops, pram_grads \ No newline at end of file + return optimize_ops, pram_grads diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py index 9a4aa568833f2..4cecbd5273650 100644 --- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py @@ -20,6 +20,8 @@ from paddle import fluid from paddle.fluid.backward import calc_gradient +paddle.enable_static() + class TestCalcGradient(unittest.TestCase): def test_calc_gradient(self): diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py index 65854279e441b..ad2e62b9e0461 100644 --- a/python/paddle/jit/dy2static/partial_program.py +++ b/python/paddle/jit/dy2static/partial_program.py @@ -1124,4 +1124,4 @@ def add_build_strategy_for( builded_program = paddle.static.Program() for var in program.block(0).vars.values(): builded_program.block(0)._clone_variable(var, False) - return builded_program \ No newline at end of file + return builded_program diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py index d9e3137a06259..06a89ce36401a 100644 --- a/python/paddle/jit/translated_layer.py +++ b/python/paddle/jit/translated_layer.py @@ -1646,4 +1646,4 @@ def _output_spec(self, method_name='forward'): ) output_spec.append(spec) - return output_spec \ No newline at end of file + return output_spec diff --git a/test/dygraph_to_static/test_gradname_parse.py b/test/dygraph_to_static/test_gradname_parse.py index 6b83ac47f8857..743f92f758f4f 100644 --- a/test/dygraph_to_static/test_gradname_parse.py +++ b/test/dygraph_to_static/test_gradname_parse.py @@ -162,4 +162,4 @@ def setUp(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/test/prim/test_comp_get_grad_op_desc_prim_enabled.py b/test/prim/test_comp_get_grad_op_desc_prim_enabled.py index 0bc173ef4049b..9c89ab72bf18e 100644 --- a/test/prim/test_comp_get_grad_op_desc_prim_enabled.py +++ b/test/prim/test_comp_get_grad_op_desc_prim_enabled.py @@ -67,6 +67,11 @@ def setUpClass(cls): for n, vs in cls.outputs.items() }, ) + + for _, outs in cls.outputs.items(): + for out in outs: + block.create_var(name=out + core.grad_var_suffix()) + cls.fwd = block.ops[0].desc @classmethod diff --git a/test/prim/test_comp_skip_op_set.py b/test/prim/test_comp_skip_op_set.py index bd98f21f235c9..bca3c97ec4d40 100644 --- a/test/prim/test_comp_skip_op_set.py +++ b/test/prim/test_comp_skip_op_set.py @@ -46,6 +46,11 @@ def setUp(self): for n, vs in self.outputs.items() }, ) + + for _, outs in self.outputs.items(): + for out in outs: + block.create_var(name=out + core.grad_var_suffix()) + self.fwd = block.ops[0].desc def tearDown(self): From 1167429a45eec5258fb8f6212488637938bab721 Mon Sep 17 00:00:00 2001 From: cxxly Date: Wed, 10 May 2023 05:06:28 +0000 Subject: [PATCH 11/12] fix _strip_grad_suffix_ bugs in higher-order autodiff --- python/paddle/fluid/backward.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index a2ea723415683..ff57966eefbfb 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -28,6 +28,8 @@ from collections.abc import Sequence +import re + __all__ = [ 'append_backward', 'gradients', @@ -459,10 +461,14 @@ def _strip_grad_suffix_(name): """ Strip the grad suffix from the given variable name e.g. x@GRAD ==> x + x@GRAD@GRAD ==> x y@GRAD@RENAME@1 ==> y + z@GRAD_slice_0@GRAD ==> z@GRAD_slice_0 """ - pos = name.find(core.grad_var_suffix()) - new_name = name[:pos] if pos != -1 else name + pos = re.search(f'{core.grad_var_suffix()}$', name) or re.search( + f'{core.grad_var_suffix()}@', name + ) + new_name = name[: pos.start()] if pos is not None else name new_pos = name.rfind('grad/') return new_name[new_pos + 5 :] if new_pos != -1 else new_name @@ -1787,6 +1793,18 @@ def infershape_for_composite(block, grad_op_desc): op_desc.infer_shape(block.desc) grad_op_desc.copy_from(op_desc) + if not framework.OpProtoHolder.instance().has_op_proto(grad_op_desc.type()): + # NOTE: Some raw fluid grad operators which hadn't been decomposed may not + # implement InferVarType method, such as elementwise_xx_grad, and it will + # cause the dtype or shape of corresponding cotangent incorrect. This + # patch set the cotangent dtype and shape same with corresponding + # forward variable. For primitive operators, we have ensure all + # InferVarType method to be executed correctly in PR#52818, we skip + # this patch for primitive operators. + for arg in grad_op_desc.output_arg_names(): + if arg in new_vars: + _infer_var_data_type_shape_(arg, block) + def _rename_grad_( block, start_op_idx, grad_to_var, target_grad_map, skip_rename_var_list From aa74cfed7f892360b8421e0c29313bf50c49eef6 Mon Sep 17 00:00:00 2001 From: cxxly Date: Wed, 10 May 2023 05:07:02 +0000 Subject: [PATCH 12/12] [prim] create output for test_static_prim.cc --- test/cpp/prim/test_static_prim.cc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/cpp/prim/test_static_prim.cc b/test/cpp/prim/test_static_prim.cc index 936ae9babfe36..04305e8ffded1 100644 --- a/test/cpp/prim/test_static_prim.cc +++ b/test/cpp/prim/test_static_prim.cc @@ -206,6 +206,11 @@ TEST(StaticPrim, TanhBackwardComposite) { auto* forward_opdesc = target_block->AllOps()[0]; std::unordered_map grad_to_var; std::vector grad_sub_block; + Tensor out_grad = prim::empty( + shape, phi::DataType::FLOAT32, paddle::Place()); + framework::VarDesc* out_grad_desc = + static_cast(out_grad.impl().get())->get_ptr(); + target_block->RenameVar(out_grad_desc->Name(), "b@GRAD"); std::vector> grad_ops = std::move(framework::OpInfoMap::Instance() .Get(forward_opdesc->Type()) @@ -288,6 +293,11 @@ TEST(StaticCompositeGradMaker, TestMutiInputMethod) { auto* forward_opdesc = target_block->AllOps()[0]; std::unordered_map grad_to_var; std::vector grad_sub_block; + Tensor out_grad = prim::empty( + shape, phi::DataType::FLOAT32, paddle::Place()); + framework::VarDesc* out_grad_desc = + static_cast(out_grad.impl().get())->get_ptr(); + target_block->RenameVar(out_grad_desc->Name(), "out@GRAD"); auto test = TestCompositeGradMaker(*forward_opdesc, std::unordered_set(), &grad_to_var, @@ -353,6 +363,19 @@ TEST(StaticCompositeGradMaker, TestMutiOutputMethod) { auto* forward_opdesc = target_block->AllOps()[0]; std::unordered_map grad_to_var; std::vector grad_sub_block; + + Tensor out1_grad = prim::empty( + shape, phi::DataType::FLOAT32, paddle::Place()); + framework::VarDesc* out1_grad_desc = + static_cast(out1_grad.impl().get())->get_ptr(); + target_block->RenameVar(out1_grad_desc->Name(), "out1@GRAD"); + + Tensor out2_grad = prim::empty( + shape, phi::DataType::FLOAT32, paddle::Place()); + framework::VarDesc* out2_grad_desc = + static_cast(out2_grad.impl().get())->get_ptr(); + target_block->RenameVar(out2_grad_desc->Name(), "out2@GRAD"); + auto test = TestCompositeGradMaker(*forward_opdesc, std::unordered_set(), &grad_to_var,