diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_base.h b/paddle/fluid/framework/new_executor/instruction/instruction_base.h index f078da97107e7..b8271a0ea0012 100644 --- a/paddle/fluid/framework/new_executor/instruction/instruction_base.h +++ b/paddle/fluid/framework/new_executor/instruction/instruction_base.h @@ -21,7 +21,6 @@ #include "paddle/fluid/framework/new_executor/new_executor_defs.h" #include "paddle/fluid/platform/event.h" -#include "paddle/ir/core/value.h" namespace ir { class Value; diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc index d8ddc30633be0..dd6aa26a1ae53 100644 --- a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc +++ b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h" + #include #include #include #include -#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h" - #include "paddle/fluid/framework/new_executor/new_executor_defs.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/event.h" @@ -42,7 +42,7 @@ std::vector GetValueIds( const std::unordered_map& variable_2_var_name) { std::vector ids; - std::string var_name = value_2_var_name.at(value); + auto& var_name = value_2_var_name.at(value); ids.push_back(var_name_2_id.at(var_name)); // NOTE(zhangbo): Value maybe a VariableRefArray auto var = inner_scope->FindVar(var_name); @@ -61,7 +61,7 @@ platform::DeviceContext* ParseDeviceContext( const platform::Place& place, const std::string& execution_stream, const int stream_priority) { - auto op_attributes = op->attributes(); + auto& op_attributes = op->attributes(); auto op_name = op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString(); interpreter::ContextManager& ctx_manager = @@ -149,7 +149,7 @@ OpFuncType AnalyseOpFuncType(::ir::Operation* op, // computing. They execute serially in device thread and block CUDA kernel // launching in other GPU OPs. To improve performance, set them as kGpuSync // and so that they would be dispatched to host thread. - auto op_attributes = op->attributes(); + auto& op_attributes = op->attributes(); auto op_name = op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString(); if (op_name == kCoalesceTensor && diff --git a/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc index eadf0c1f806cf..7250d3cd04d9e 100644 --- a/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.cc @@ -43,7 +43,7 @@ LegacyKernelInstruction::LegacyKernelInstruction( const std::unordered_map& variable_2_var_name) : InstructionBase(id, place) { - auto op_attributes = op->attributes(); + auto& op_attributes = op->attributes(); auto op_name = op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString(); ir::OpInfo op_info = ir::IrContext::Instance()->GetRegisteredOpInfo(op_name); @@ -97,18 +97,20 @@ LegacyKernelInstruction::LegacyKernelInstruction( yaml_interface->get_op_info_()); VLOG(6) << "finish process yaml_info_parser"; - ::ir::BuildPhiContext< - phi::InferMetaContext, - phi::MetaTensor, - phi::MetaTensor, - paddle::small_vector, - paddle::small_vector, - false>(op, - value_2_var_name, - scope, - local_scope, - yaml_info_parser, - &infer_meta_context_); + if (infer_meta_interface_) { + ::ir::BuildPhiContext< + phi::InferMetaContext, + phi::MetaTensor, + phi::MetaTensor, + paddle::small_vector, + paddle::small_vector, + false>(op, + value_2_var_name, + scope, + local_scope, + yaml_info_parser, + &infer_meta_context_); + } VLOG(6) << "finish process infer meta context"; auto kernel_name = @@ -123,8 +125,10 @@ LegacyKernelInstruction::LegacyKernelInstruction( phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name); VLOG(6) << "finish process select kernel"; - operator_base_ = - ir::BuildOperatorBase(op, value_2_var_name, yaml_info_parser); + Scope* inner_scope = local_scope == nullptr ? scope : local_scope; + + operator_base_ = ir::BuildOperatorBase( + op, value_2_var_name, yaml_info_parser, variable_2_var_name, inner_scope); paddle::framework::VariableValueMap in_map; paddle::framework::VariableValueMap out_map; auto dev_ctx = phi::DeviceContextPool::Instance().Get( @@ -151,7 +155,6 @@ LegacyKernelInstruction::LegacyKernelInstruction( GetStreamPriority())); VLOG(6) << "finish process device context"; - Scope* inner_scope = local_scope == nullptr ? scope : local_scope; InitInputsOutputsIds( op, inner_scope, value_2_var_name, var_name_2_id, variable_2_var_name); VLOG(6) << "finish process inputs outputs index"; @@ -169,10 +172,16 @@ LegacyKernelInstruction::~LegacyKernelInstruction() { if (kernel_context_ != nullptr) { delete kernel_context_; } + + if (phi_kernel_ != nullptr) { + delete phi_kernel_; + } } void LegacyKernelInstruction::Run() { - infer_meta_interface_->infer_meta_(&(infer_meta_context_)); + if (infer_meta_interface_) { + infer_meta_interface_->infer_meta_(&(infer_meta_context_)); + } VLOG(6) << "Run op " << legacy_op_name_ << " infer meta."; (*(phi_kernel_))((kernel_context_)); VLOG(6) << "Run op " << legacy_op_name_ << " kernel."; diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 7f5bcff428195..2b909b401de91 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -1076,36 +1076,17 @@ void BuildOpFuncList( "not found kernel for [%s]", kernel_name); - if (kernel_name == "fused_softmax_mask_upper_triangle" || - kernel_name == "fused_softmax_mask_upper_triangle_grad") { - // builder operator - op_func_node.operator_base_ = - ir::BuildOperatorBase(op, value_2_name_map, op_yaml_info_parser); - paddle::framework::VariableValueMap in_map; - paddle::framework::VariableValueMap out_map; - op_func_node.runtime_ctx_ = - std::make_shared( - paddle::framework::RuntimeContext(in_map, out_map)); - ir::BuildRuntimeContext(op, - value_2_name_map, - scope, - local_scope, - op_yaml_info_parser, - op_func_node.runtime_ctx_.get()); - op_func_node.fluid_op = true; - } else { - ::ir::BuildPhiContext, - paddle::small_vector, - true>(op, - value_2_name_map, - scope, - local_scope, - op_yaml_info_parser, - &(op_func_node.kernel_context_)); - } + ::ir::BuildPhiContext, + paddle::small_vector, + true>(op, + value_2_name_map, + scope, + local_scope, + op_yaml_info_parser, + &(op_func_node.kernel_context_)); VLOG(6) << "finish process kernel context"; op_func_node.kernel_context_.SetDeviceContext( diff --git a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc index ead957f7e87ec..6d42dfaec08dc 100644 --- a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc @@ -38,6 +38,7 @@ #include "paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.h" #include "paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h" +#include "paddle/fluid/ir/dialect/utils.h" #include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h" #include "paddle/ir/core/builtin_attribute.h" @@ -439,8 +440,7 @@ void NewIRInterpreter::BuildInstruction() { } VLOG(6) << "process " << op_name; - if (op_name == "pd.fused_softmax_mask_upper_triangle" || - op_name == "pd.fused_softmax_mask_upper_triangle_grad") { + if (dialect::IsLegacyOp(op_name)) { vec_instruction_base_.emplace_back( std::make_unique(op_idx++, place_, diff --git a/paddle/fluid/ir/dialect/pd_op.yaml b/paddle/fluid/ir/dialect/pd_op.yaml index 1c20c409df1eb..7065834c25ead 100644 --- a/paddle/fluid/ir/dialect/pd_op.yaml +++ b/paddle/fluid/ir/dialect/pd_op.yaml @@ -103,7 +103,7 @@ - {typename: bool, name: load_as_fp16} - {typename: bool, name: model_from_memory} outputs: - - {typename: 'Tensor[]', name: out, optional: true, intermediate: false} + - {typename: 'Tensor[]', name: Out, optional: true, intermediate: false} no_need_buffer: null data_transform: null kernel: diff --git a/paddle/fluid/ir/dialect/utils.cc b/paddle/fluid/ir/dialect/utils.cc index cd6ff35ef7f4e..fbaa6d0e6025e 100644 --- a/paddle/fluid/ir/dialect/utils.cc +++ b/paddle/fluid/ir/dialect/utils.cc @@ -17,6 +17,11 @@ namespace paddle { namespace dialect { +const std::unordered_set LegacyOpList = { + "pd.fused_softmax_mask_upper_triangle", + "pd.fused_softmax_mask_upper_triangle_grad", + "pd.load_combine"}; + enum class AttrType { UNDEFINED = 0, BOOL, @@ -167,5 +172,7 @@ VariantType GetAttributeData(const ir::Attribute& attr) { return kAttrCastMap[attr_type](attr); } +bool IsLegacyOp(const std::string& name) { return LegacyOpList.count(name); } + } // namespace dialect } // namespace paddle diff --git a/paddle/fluid/ir/dialect/utils.h b/paddle/fluid/ir/dialect/utils.h index 13a9f3d7ac8b8..2cc1c653fc0ef 100644 --- a/paddle/fluid/ir/dialect/utils.h +++ b/paddle/fluid/ir/dialect/utils.h @@ -147,5 +147,7 @@ static inline ir::Attribute TransToIrAttribute(phi::Scalar scalar, VariantType GetAttributeData(const ir::Attribute& attr); +bool IsLegacyOp(const std::string& name); + } // namespace dialect } // namespace paddle diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc index 42b547b08d3b3..b5bf6e123ac5e 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc @@ -598,17 +598,39 @@ void BuildRuntimeContext( PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name), phi::errors::PreconditionNotMet( "can not find var[%s] in scope", in_var_name)); + auto var = inner_scope->FindVar(in_var_name); - std::vector vec_tmp = {var}; - auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); - runtime_ctx->outputs[legacy_attr_name] = vec_tmp; + + auto type = ptr.type(); + auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); + if (type.isa() || + type.isa()) { + std::vector vec_tmp = {var}; + + runtime_ctx->outputs[legacy_arg_name] = vec_tmp; + } else if (type.isa()) { + auto var_ref = var->Get(); + std::vector vec_tmp; + vec_tmp.reserve(var_ref.size()); + for (size_t k = 0; k < var_ref.size(); ++k) { + vec_tmp.push_back(const_cast(var_ref[k])); + } + runtime_ctx->outputs[legacy_arg_name] = vec_tmp; + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "only support AllocatedDenseTensor, AllocatedSelectedRowsType and " + "ir::vector type")); + } } } std::shared_ptr BuildOperatorBase( ir::Operation* op, const std::unordered_map& name_map, - const paddle::dialect::OpYamlInfoParser& op_yaml_info) { + const paddle::dialect::OpYamlInfoParser& op_yaml_info, + const std::unordered_map& + variable_2_var_name, + const paddle::framework::Scope* scope) { paddle::framework::VariableNameMap in_name_map; paddle::framework::VariableNameMap out_name_map; paddle::framework::AttributeMap attr_map; @@ -637,6 +659,30 @@ std::shared_ptr BuildOperatorBase( } // build attribute + auto& op_attr_map = op->attributes(); + auto attr_name_list = op_yaml_info.AttrParams(true); + for (auto& name : attr_name_list) { + auto& val = op_attr_map.at(name); + + if (val.isa()) { + attr_map[name] = val.dyn_cast().AsString(); + } else if (val.isa()) { + attr_map[name] = val.dyn_cast().data(); + } else if (val.isa()) { + attr_map[name] = val.dyn_cast().data(); + } else if (val.isa()) { + attr_map[name] = val.dyn_cast().data(); + } else if (val.isa()) { + attr_map[name] = val.dyn_cast().data(); + } else if (val.isa()) { + attr_map[name] = val.dyn_cast().data(); + } else { + std::stringstream ss; + val.Print(ss); + VLOG(1) << "type not support " << ss.str() << std::endl; + PADDLE_THROW("Type[%s] in attribute map not support yet", ss.str()); + } + } auto& output_name_list = op_yaml_info.OutputNames(); for (size_t i = 0; i < output_name_list.size(); ++i) { @@ -644,8 +690,26 @@ std::shared_ptr BuildOperatorBase( ir::Value ptr = op->result(i); auto out_var_name = name_map.at(ptr); - auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); - out_name_map[legacy_attr_name].push_back(out_var_name); + + auto type = ptr.type(); + auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); + if (type.isa() || + type.isa()) { + out_name_map[legacy_arg_name].push_back(out_var_name); + } else if (type.isa()) { + auto var = scope->FindVar(out_var_name); + auto var_ref = var->Get(); + for (size_t k = 0; k < var_ref.size(); ++k) { + PADDLE_ENFORCE(variable_2_var_name.count(var_ref[k]), + "Variable MUST in variable_2_var_name map"); + out_name_map[legacy_arg_name].push_back( + variable_2_var_name.at(var_ref[k])); + } + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "only support AllocatedDenseTensor, AllocatedSelectedRowsType and " + "ir::vector type")); + } } auto& op_info = paddle::framework::OpInfoMap::Instance().Get(fluid_op_name); diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h index fc2658d482c5b..f59b8d927cbdd 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h @@ -62,7 +62,10 @@ void BuildRuntimeContext( std::shared_ptr BuildOperatorBase( ir::Operation* op, const std::unordered_map& name_map, - const paddle::dialect::OpYamlInfoParser& op_yaml_info); + const paddle::dialect::OpYamlInfoParser& op_yaml_info, + const std::unordered_map& + variable_2_var_name, + const paddle::framework::Scope* scope); template UnchangeOutputOps = { "builtin.get_parameter", "pd.shadow_output"}; -const std::unordered_set LegacyOpList = { - "pd.fused_softmax_mask_upper_triangle", - "pd.fused_softmax_mask_upper_triangle_grad"}; - bool NeedFallBackCpu(const ir::Operation* op, const std::string& kernel_fn_name, const phi::KernelKey& kernel_key) { @@ -553,6 +549,9 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, GetKernelKey(op_item, place, map_value_pair, op_info_parser.get()); VLOG(6) << "kernel type " << kernel_key; + if (op_item->name() == "pd.load_combine") { + kernel_key.set_dtype(phi::DataType::FLOAT32); + } if (NeedFallBackCpu((op_item), kernel_fn_str, kernel_key)) { kernel_key.set_backend(phi::Backend::CPU); } @@ -571,7 +570,7 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, auto args_def = phi_kernel.args_def(); auto output_defs = args_def.output_defs(); if (!UnchangeOutputOps.count(op_item->name()) && - !LegacyOpList.count(op_item->name())) { + !IsLegacyOp(op_item->name())) { PADDLE_ENFORCE_EQ( op_item->num_results(), output_defs.size(), @@ -583,7 +582,7 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, for (size_t i = 0; i < op_item->num_results(); ++i) { phi::Place out_place; if ((!UnchangeOutputOps.count(op_item->name())) && - (!LegacyOpList.count(op_item->name())) && phi_kernel.IsValid()) { + (!IsLegacyOp(op_item->name())) && phi_kernel.IsValid()) { out_place = phi::TransToPhiPlace(output_defs[i].backend); } else { out_place = phi::TransToPhiPlace(kernel_key.backend()); diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py index 949422ecc6c9a..43128c3e0b5b0 100644 --- a/test/ir/new_ir/test_standalone_new_ir.py +++ b/test/ir/new_ir/test_standalone_new_ir.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import os +import tempfile import unittest import numpy as np @@ -287,11 +288,18 @@ def test_with_new_ir(self): class TestJitSaveOp(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.model_path = os.path.join(self.temp_dir.name, "new_ir_save_load") + + def tearDown(self): + self.temp_dir.cleanup() + def test_with_new_ir(self): paddle.disable_static() linear = paddle.nn.Linear(10, 10) - path = "example_model/linear" + path = os.path.join(self.model_path, "linear") paddle.jit.save( linear, @@ -299,6 +307,26 @@ def test_with_new_ir(self): input_spec=[paddle.static.InputSpec([10, 10], 'float32', 'x')], ) + paddle.enable_static() + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) + + exe = paddle.static.Executor(place) + + [ + inference_program, + feed_target_names, + fetch_targets, + ] = paddle.static.io.load_inference_model( + self.model_path, + executor=exe, + model_filename="linear.pdmodel", + params_filename="linear.pdiparams", + ) + if __name__ == "__main__": paddle.enable_static()