diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 4d0b04209c059..dc661fce388fe 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -201,10 +201,6 @@ if(WITH_DISTRIBUTE) add_definitions(-DPADDLE_WITH_DISTRIBUTE) endif() -if(WITH_GFLAGS) - add_definitions(-DPADDLE_WITH_GFLAGS) -endif() - if(WITH_PSCORE) add_definitions(-DPADDLE_WITH_PSCORE) endif() diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 75436783c7ede..3398f8a28307e 100755 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -110,6 +110,7 @@ endif() set(flags_dep) if(WITH_GFLAGS) list(APPEND flags_dep gflags) + add_definitions(-DPADDLE_WITH_GFLAGS) else() list(APPEND flags_dep paddle_flags) endif() diff --git a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc index 27ac1681a4008..3dc9175dbfd4b 100644 --- a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc +++ b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc @@ -414,7 +414,6 @@ void analyse_event_info_for_two_instructions( if (has_data_dependency( instructions[cur_instr_id], instructions[next_instr_id]) || - !run_type_info[next_instr_id][DownstreamRunType::kEventRun].empty() || instructions[next_instr_id]->OpBase()->Type() == "depend") { waiter_instr_ids->insert(next_instr_id); return; @@ -474,7 +473,6 @@ void analyse_event_info_for_two_instructions< if (has_data_dependency( instructions[cur_instr_id], instructions[next_instr_id]) || - !run_type_info[next_instr_id][DownstreamRunType::kEventRun].empty() || instructions[next_instr_id]->Name() == "pd.depend") { waiter_instr_ids->insert(next_instr_id); return; diff --git a/paddle/fluid/framework/type_info.cc b/paddle/fluid/framework/type_info.cc index 442800d035f55..cb7dae540d119 100644 --- a/paddle/fluid/framework/type_info.cc +++ b/paddle/fluid/framework/type_info.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/raw_tensor.h" #include "paddle/fluid/framework/string_array.h" +#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/primitive/type/lazy_tensor.h" @@ -44,5 +45,6 @@ template class TypeInfoTraits; template class TypeInfoTraits; template class TypeInfoTraits; +template class TypeInfoTraits; } // namespace phi diff --git a/paddle/fluid/ir/dialect/op_generator/op_build_gen.py b/paddle/fluid/ir/dialect/op_generator/op_build_gen.py index d9747b47e8747..d36c269648315 100644 --- a/paddle/fluid/ir/dialect/op_generator/op_build_gen.py +++ b/paddle/fluid/ir/dialect/op_generator/op_build_gen.py @@ -286,27 +286,25 @@ def GenBuildOutputs( build_output_str = ' VLOG(4) << "Builder construction outputs";\n' CREATE_INPUT_METATENSOR_TEMPLATE = """ VLOG(4) << "Builder construction dense_{name}"; - phi::DenseTensor dense_{name}(std::make_unique(paddle::platform::CPUPlace()).get(), - phi::DenseTensorMeta(paddle::dialect::TransToPhiDataType({name}.dtype()), - {name}.dims(), - {name}.data_layout(), - {name}.lod(), - {name}.offset())); + paddle::dialect::IrMetaTensor ir_meta_tensor_{name}(paddle::dialect::TransToPhiDataType({name}.dtype()), + {name}.dims(), + {name}.data_layout(), + {name}.lod(), + {name}.offset()); VLOG(4) << "Builder construction meta_{name}"; - phi::MetaTensor meta_{name}(&dense_{name}); + phi::MetaTensor meta_{name}(&ir_meta_tensor_{name}); """ - CREATE_INPUT_VEC_METATENSOR_TEMPLATE = """ std::vector vec_dense_{name}; + CREATE_INPUT_VEC_METATENSOR_TEMPLATE = """ std::vector vec_ir_meta_tensor_{name}; for (size_t i=0; i < static_cast({name}.size()); i++) {{ - vec_dense_{name}.push_back(phi::DenseTensor(std::make_unique(paddle::platform::CPUPlace()).get(), - phi::DenseTensorMeta(paddle::dialect::TransToPhiDataType({name}[i].dyn_cast().dtype()), + vec_ir_meta_tensor_{name}.push_back(paddle::dialect::IrMetaTensor(paddle::dialect::TransToPhiDataType({name}[i].dyn_cast().dtype()), {name}[i].dyn_cast().dims(), {name}[i].dyn_cast().data_layout(), {name}[i].dyn_cast().lod(), - {name}[i].dyn_cast().offset()))); + {name}[i].dyn_cast().offset())); }} std::vector vec_meta_{name}; - for (size_t i=0; i < vec_dense_{name}.size(); i++) {{ - vec_meta_{name}.push_back(phi::MetaTensor(&vec_dense_{name}[i])); + for (size_t i=0; i < vec_ir_meta_tensor_{name}.size(); i++) {{ + vec_meta_{name}.push_back(phi::MetaTensor(&vec_ir_meta_tensor_{name}[i])); }} std::vector meta_{name}; diff --git a/paddle/fluid/ir/dialect/op_generator/op_gen.py b/paddle/fluid/ir/dialect/op_generator/op_gen.py index 5d51a731c546a..7ee65d050581b 100644 --- a/paddle/fluid/ir/dialect/op_generator/op_gen.py +++ b/paddle/fluid/ir/dialect/op_generator/op_gen.py @@ -101,6 +101,7 @@ class {op_name} : public ir::Op<{op_name}{interfaces}{traits}> {{ #include "{h_file}" #include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_type.h" #include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_attribute.h" +#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h" #include "paddle/ir/core/builtin_attribute.h" #include "paddle/ir/core/builtin_type.h" #include "paddle/ir/core/builtin_op.h" @@ -172,7 +173,7 @@ class {op_name} : public ir::Op<{op_name}{interfaces}{traits}> {{ 'bool': 'ir::BoolAttribute', } -_NO_NEED_GEN_OPS = {'add_n', 'split_grad'} +_NO_NEED_GEN_OPS = {'add_n', 'add_n_', 'add_n_with_kernel', 'split_grad'} def to_phi_and_fluid_op_name(op_item): diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/CMakeLists.txt b/paddle/fluid/ir/dialect/paddle_dialect/ir/CMakeLists.txt index cc8d1357bf070..08cc463c34c9b 100644 --- a/paddle/fluid/ir/dialect/paddle_dialect/ir/CMakeLists.txt +++ b/paddle/fluid/ir/dialect/paddle_dialect/ir/CMakeLists.txt @@ -183,7 +183,7 @@ add_custom_target(ops_api_gen ALL DEPENDS ${ops_api_source_file}) cc_library( pd_dialect_core - SRCS pd_attribute.cc pd_type.cc + SRCS pd_attribute.cc pd_type.cc pd_meta_tensor.cc DEPS phi pd_interface pd_trait type_info) cc_library( pd_dialect_op diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.cc b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.cc index dd68500a626a0..9c89059db6936 100644 --- a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.cc +++ b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.cc @@ -49,6 +49,8 @@ void PaddleDialect::initialize() { #include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_op.h" // NOLINT >(); RegisterOps(); diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.cc b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.cc index 2f2ba34c881e4..3d16c44405ab0 100644 --- a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.cc +++ b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.cc @@ -58,13 +58,18 @@ void AddNOp::Verify() { "The size %d of inputs must be equal to 1.", input_size)); if (auto vec_type = (*this)->operand(0).type().dyn_cast()) { for (size_t i = 0; i < vec_type.size(); ++i) { - PADDLE_ENFORCE(vec_type[i].isa(), + PADDLE_ENFORCE(vec_type[i].isa() || + vec_type[i].isa(), phi::errors::PreconditionNotMet( "Type validation failed for the 0th input.")); } } else { PADDLE_ENFORCE( - (*this)->operand(0).type().isa(), + (*this)->operand(0).type().isa() || + (*this) + ->operand(0) + .type() + .isa(), phi::errors::PreconditionNotMet( "Type validation failed for the 0th input.")); } @@ -82,7 +87,8 @@ void AddNOp::Verify() { phi::errors::PreconditionNotMet( "The size %d of outputs must be equal to 1.", output_size)); PADDLE_ENFORCE( - (*this)->result(0).type().isa(), + (*this)->result(0).type().isa() || + (*this)->result(0).type().isa(), phi::errors::PreconditionNotMet( "Type validation failed for the 0th output.")); } @@ -147,6 +153,262 @@ void AddNOp::InferMeta(phi::InferMetaContext *infer_meta) { fn(infer_meta); } +OpInfoTuple AddN_Op::GetOpInfo() { + std::vector inputs = { + paddle::dialect::OpInputInfo( + "inputs", + "ir::VectorType", + false, + false, + false)}; + std::vector attributes = {}; + std::vector outputs = { + paddle::dialect::OpOutputInfo( + "out", "paddle::dialect::DenseTensorType", false, false)}; + paddle::dialect::OpRunTimeInfo run_time_info = paddle::dialect::OpRunTimeInfo( + "AddNInferMeta", {"inputs"}, {"add_n"}, {"inputs"}, {}, {}, {}, {}); + return std::make_tuple(inputs, attributes, outputs, run_time_info, "add_n_"); +} + +void AddN_Op::Build(ir::Builder &builder, + ir::OperationArgument &argument, + ir::OpResult inputs_) { + VLOG(4) << "Builder construction inputs"; + std::vector argument_inputs = {inputs_}; + argument.AddOperands(argument_inputs.begin(), argument_inputs.end()); + + VLOG(4) << "Builder construction attributes"; + + VLOG(4) << "Builder construction outputs"; + ir::VectorType inputs = inputs_.type().dyn_cast(); + (void)inputs; + std::vector vec_dense_inputs; + for (size_t i = 0; i < static_cast(inputs.size()); i++) { + vec_dense_inputs.push_back(phi::DenseTensor( + std::make_unique( + paddle::platform::CPUPlace()) + .get(), + phi::DenseTensorMeta( + paddle::dialect::TransToPhiDataType( + inputs[i].dyn_cast().dtype()), + inputs[i].dyn_cast().dims(), + inputs[i] + .dyn_cast() + .data_layout(), + inputs[i].dyn_cast().lod(), + inputs[i].dyn_cast().offset()))); + } + std::vector vec_meta_inputs; + for (size_t i = 0; i < vec_dense_inputs.size(); i++) { + vec_meta_inputs.push_back(phi::MetaTensor(&vec_dense_inputs[i])); + } + + std::vector meta_inputs; + for (size_t i = 0; i < static_cast(vec_meta_inputs.size()); i++) { + meta_inputs.push_back(&vec_meta_inputs[i]); + } + phi::DenseTensor dense_out; + phi::MetaTensor meta_out(&dense_out); + + phi::AddNInferMeta(meta_inputs, &meta_out); + + std::vector argument_outputs; + ir::Type out_dense_tensor_type = paddle::dialect::DenseTensorType::get( + ir::IrContext::Instance(), + paddle::dialect::TransToIrDataType(dense_out.dtype()), + dense_out.dims(), + dense_out.layout(), + dense_out.lod(), + dense_out.offset()); + argument_outputs.push_back(out_dense_tensor_type); + argument.AddOutputs(argument_outputs.begin(), argument_outputs.end()); +} + +void AddN_Op::Verify() { + VLOG(4) << "Start Verifying inputs, outputs and attributes for: AddN_Op."; + VLOG(4) << "Verifying inputs:"; + { + auto input_size = num_operands(); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::PreconditionNotMet( + "The size %d of inputs must be equal to 1.", input_size)); + if (auto vec_type = + (*this)->operand_source(0).type().dyn_cast()) { + for (size_t i = 0; i < vec_type.size(); ++i) { + PADDLE_ENFORCE(vec_type[i].isa() || + vec_type[i].isa(), + phi::errors::PreconditionNotMet( + "Type validation failed for the 0th input.")); + } + } else { + PADDLE_ENFORCE((*this)->operand_source(0) + .type() + .isa() || + (*this) + ->operand_source(0) + .type() + .isa(), + phi::errors::PreconditionNotMet( + "Type validation failed for the 0th input.")); + } + } + VLOG(4) << "Verifying attributes:"; + { + // Attributes num is 0, not need to check attributes type. + } + VLOG(4) << "Verifying outputs:"; + { + auto output_size = num_results(); + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::PreconditionNotMet( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE( + (*this)->result(0).type().isa() || + (*this)->result(0).type().isa(), + phi::errors::PreconditionNotMet( + "Type validation failed for the 0th output.")); + } + VLOG(4) << "End Verifying for: AddN_Op."; +} + +void AddN_Op::InferMeta(phi::InferMetaContext *infer_meta) { + auto fn = PD_INFER_META(phi::AddNInferMeta); + fn(infer_meta); +} + +OpInfoTuple AddNWithKernelOp::GetOpInfo() { + std::vector inputs = { + paddle::dialect::OpInputInfo( + "inputs", + "ir::VectorType", + false, + false, + false)}; + std::vector attributes = {}; + std::vector outputs = { + paddle::dialect::OpOutputInfo( + "out", "paddle::dialect::DenseTensorType", false, false)}; + paddle::dialect::OpRunTimeInfo run_time_info = paddle::dialect::OpRunTimeInfo( + "AddNInferMeta", {"inputs"}, {"add_n"}, {"inputs"}, {}, {}, {}, {}); + return std::make_tuple( + inputs, attributes, outputs, run_time_info, "add_n_with_kernel"); +} + +void AddNWithKernelOp::Build(ir::Builder &builder, + ir::OperationArgument &argument, + ir::OpResult inputs_) { + VLOG(4) << "Builder construction inputs"; + std::vector argument_inputs = {inputs_}; + argument.AddOperands(argument_inputs.begin(), argument_inputs.end()); + + VLOG(4) << "Builder construction attributes"; + + VLOG(4) << "Builder construction outputs"; + ir::VectorType inputs = inputs_.type().dyn_cast(); + (void)inputs; + std::vector vec_dense_inputs; + for (size_t i = 0; i < static_cast(inputs.size()); i++) { + vec_dense_inputs.push_back(phi::DenseTensor( + std::make_unique( + paddle::platform::CPUPlace()) + .get(), + phi::DenseTensorMeta( + paddle::dialect::TransToPhiDataType( + inputs[i].dyn_cast().dtype()), + inputs[i].dyn_cast().dims(), + inputs[i] + .dyn_cast() + .data_layout(), + inputs[i].dyn_cast().lod(), + inputs[i].dyn_cast().offset()))); + } + std::vector vec_meta_inputs; + for (size_t i = 0; i < vec_dense_inputs.size(); i++) { + vec_meta_inputs.push_back(phi::MetaTensor(&vec_dense_inputs[i])); + } + + std::vector meta_inputs; + for (size_t i = 0; i < static_cast(vec_meta_inputs.size()); i++) { + meta_inputs.push_back(&vec_meta_inputs[i]); + } + phi::DenseTensor dense_out; + phi::MetaTensor meta_out(&dense_out); + + phi::AddNInferMeta(meta_inputs, &meta_out); + + std::vector argument_outputs; + ir::Type out_dense_tensor_type = paddle::dialect::DenseTensorType::get( + ir::IrContext::Instance(), + paddle::dialect::TransToIrDataType(dense_out.dtype()), + dense_out.dims(), + dense_out.layout(), + dense_out.lod(), + dense_out.offset()); + argument_outputs.push_back(out_dense_tensor_type); + argument.AddOutputs(argument_outputs.begin(), argument_outputs.end()); +} + +void AddNWithKernelOp::Verify() { + VLOG(4) << "Start Verifying inputs, outputs and attributes for: " + "AddNWithKernelOp."; + VLOG(4) << "Verifying inputs:"; + { + auto input_size = num_operands(); + PADDLE_ENFORCE_EQ( + input_size, + 1u, + phi::errors::PreconditionNotMet( + "The size %d of inputs must be equal to 1.", input_size)); + if (auto vec_type = + (*this)->operand_source(0).type().dyn_cast()) { + for (size_t i = 0; i < vec_type.size(); ++i) { + PADDLE_ENFORCE(vec_type[i].isa() || + vec_type[i].isa(), + phi::errors::PreconditionNotMet( + "Type validation failed for the 0th input.")); + } + } else { + PADDLE_ENFORCE((*this)->operand_source(0) + .type() + .isa() || + (*this) + ->operand_source(0) + .type() + .isa(), + phi::errors::PreconditionNotMet( + "Type validation failed for the 0th input.")); + } + } + VLOG(4) << "Verifying attributes:"; + { + // Attributes num is 0, not need to check attributes type. + } + VLOG(4) << "Verifying outputs:"; + { + auto output_size = num_results(); + PADDLE_ENFORCE_EQ( + output_size, + 1u, + phi::errors::PreconditionNotMet( + "The size %d of outputs must be equal to 1.", output_size)); + PADDLE_ENFORCE( + (*this)->result(0).type().isa() || + (*this)->result(0).type().isa(), + phi::errors::PreconditionNotMet( + "Type validation failed for the 0th output.")); + } + VLOG(4) << "End Verifying for: AddNWithKernelOp."; +} + +void AddNWithKernelOp::InferMeta(phi::InferMetaContext *infer_meta) { + auto fn = PD_INFER_META(phi::AddNInferMeta); + fn(infer_meta); +} + const char *FusedGemmEpilogueOp::attributes_name[3] = { "trans_x", "trans_y", "activation"}; @@ -794,3 +1056,5 @@ IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::AddNOp) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::FusedGemmEpilogueOp) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::FusedGemmEpilogueGradOp) IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::SplitGradOp) +IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::AddN_Op) +IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::AddNWithKernelOp) diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.h b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.h index 4db22c3908254..ca163029e7d0d 100644 --- a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.h +++ b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.h @@ -24,6 +24,7 @@ paddle::dialect::AddNOp, paddle::dialect::SplitGradOp #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/ir/dialect/paddle_dialect/interface/infermeta.h" #include "paddle/fluid/ir/dialect/paddle_dialect/interface/op_yaml_info.h" +#include "paddle/fluid/ir/dialect/paddle_dialect/trait/inplace.h" #include "paddle/fluid/ir/dialect/paddle_dialect/utils/op_yaml_info_util.h" #include "paddle/fluid/ir/dialect/paddle_dialect/utils/utils.h" #include "paddle/ir/core/builder.h" @@ -51,6 +52,47 @@ class AddNOp : public ir::Op { static void InferMeta(phi::InferMetaContext *infer_meta); }; +class AddN_Op : public ir::Op { + public: + using Op::Op; + static const char *name() { return "pd.add_n_"; } + static constexpr const char **attributes_name = nullptr; + static constexpr uint32_t attributes_num = 0; + static OpInfoTuple GetOpInfo(); + static void Build(ir::Builder &builder, // NOLINT + ir::OperationArgument &argument, // NOLINT + ir::OpResult inputs_); + + void Verify(); + ir::Value inputs() { return operand_source(0); } + ir::OpResult out() { return result(0); } + + static void InferMeta(phi::InferMetaContext *infer_meta); +}; + +class AddNWithKernelOp : public ir::Op { + public: + using Op::Op; + static const char *name() { return "pd.add_n_with_kernel"; } + static constexpr const char **attributes_name = nullptr; + static constexpr uint32_t attributes_num = 0; + static OpInfoTuple GetOpInfo(); + static void Build(ir::Builder &builder, // NOLINT + ir::OperationArgument &argument, // NOLINT + ir::OpResult inputs_); + + void Verify(); + ir::Value inputs() { return operand_source(0); } + ir::OpResult out() { return result(0); } + + static void InferMeta(phi::InferMetaContext *infer_meta); +}; + class FusedGemmEpilogueOp : public ir::Op { @@ -137,5 +179,7 @@ IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::AddNOp) IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::FusedGemmEpilogueOp) IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::FusedGemmEpilogueGradOp) IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::SplitGradOp) +IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::AddN_Op) +IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::AddNWithKernelOp) #endif diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.cc b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.cc new file mode 100644 index 0000000000000..2da7b098a6556 --- /dev/null +++ b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h" + +#include "paddle/ir/core/enforce.h" + +namespace paddle { +namespace dialect { +IrMetaTensor::IrMetaTensor(phi::DataType dtype, + const phi::DDim& dims, + phi::DataLayout layout, + const LoD& lod, + size_t offset) + : dims_(dims), dtype_(dtype), layout_(layout), lod_(lod), offset_(offset) {} + +IrMetaTensor::IrMetaTensor(const IrMetaTensor& other) { + dims_ = other.dims(); + dtype_ = other.dtype(); + layout_ = other.layout(); + lod_ = other.lod(); + offset_ = other.offset(); +} + +IrMetaTensor& IrMetaTensor::operator=(const IrMetaTensor& other) { + dims_ = other.dims(); + dtype_ = other.dtype(); + layout_ = other.layout(); + lod_ = other.lod(); + offset_ = other.offset(); + return *this; +} + +IrMetaTensor& IrMetaTensor::operator=(IrMetaTensor&& other) noexcept { + dims_ = std::move(other.dims()); + dtype_ = other.dtype(); + layout_ = other.layout(); + lod_ = std::move(other.lod()); + offset_ = other.offset(); + return *this; +} + +int64_t IrMetaTensor::numel() const { return phi::product(dims_); } + +const phi::Place& IrMetaTensor::place() const { + IR_THROW("Don't use IrMetaTensor::place method."); +} + +void* IrMetaTensor::AllocateFrom(phi::Allocator* allocator, + phi::DataType dtype, + size_t requested_size, + bool fake_alloc) { + IR_THROW("Don't use IrMetaTensor::AllocateFrom method."); +} + +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h new file mode 100644 index 0000000000000..ffcbd415c368a --- /dev/null +++ b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h @@ -0,0 +1,80 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/allocator.h" +#include "paddle/phi/core/tensor_base.h" +#include "paddle/phi/core/tensor_meta.h" + +namespace paddle { +namespace dialect { + +using LoD = std::vector>; + +class IrMetaTensor : public phi::TensorBase, + public phi::TypeInfoTraits { + public: + IrMetaTensor(phi::DataType dtype, + const phi::DDim& dims, + phi::DataLayout layout, + const LoD& lod, + size_t offset = 0); + + IrMetaTensor(IrMetaTensor&& other) = default; + + IrMetaTensor(const IrMetaTensor& other); + + IrMetaTensor& operator=(const IrMetaTensor& other); + + IrMetaTensor& operator=(IrMetaTensor&& other) noexcept; + + virtual ~IrMetaTensor() = default; + + public: + static const char* name() { return "IrMetaTensor"; } + + int64_t numel() const override; + + const phi::DDim& dims() const noexcept override { return dims_; } + + const phi::Place& place() const override; + + phi::DataType dtype() const noexcept override { return dtype_; } + + phi::DataLayout layout() const noexcept override { return layout_; } + + const LoD& lod() const noexcept { return lod_; } + + size_t offset() const noexcept { return offset_; } + + bool valid() const noexcept override { return true; } + + bool initialized() const override { return true; } + + void* AllocateFrom(phi::Allocator* allocator, + phi::DataType dtype, + size_t requested_size = 0, + bool fake_alloc = false) override; + + private: + phi::DDim dims_; + phi::DataType dtype_{phi::DataType::UNDEFINED}; + phi::DataLayout layout_{phi::DataLayout::NCHW}; + LoD lod_; + size_t offset_{0}; +}; + +} // namespace dialect +} // namespace paddle diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h index 1958a9444bcb9..b1916d5418f77 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h @@ -118,8 +118,18 @@ void BuildPhiContext(ir::Operation* op, InListType inputs; auto& variable_array = var->Get(); for (size_t i = 0; i < variable_array.size(); ++i) { - inputs.emplace_back(InType(const_cast( - &(variable_array[i]->Get())))); + if (variable_array[i]->IsType()) { + inputs.emplace_back(InType(const_cast( + &(variable_array[i]->Get())))); + } else if (variable_array[i]->IsType()) { + inputs.emplace_back(InType(const_cast( + &(variable_array[i]->Get())))); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "Only support Vector and vector now, " + "not support vector<%d>.", + variable_array[i]->Type())); + } } ctx->EmplaceBackInputs(inputs); } else { @@ -315,8 +325,18 @@ void BuildPhiContext(ir::Operation* op, auto& variable_array = inner_scope->FindVar(name_map.at(out_ptr)) ->Get(); for (size_t i = 0; i < variable_array.size(); ++i) { - outputs.emplace_back(OutType(const_cast( - &(variable_array[i]->Get())))); + if (variable_array[i]->IsType()) { + outputs.emplace_back(OutType(const_cast( + &(variable_array[i]->Get())))); + } else if (variable_array[i]->IsType()) { + outputs.emplace_back(OutType(const_cast( + &(variable_array[i]->Get())))); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "Only support Vector and vector now, " + "not support vector<%d>.", + variable_array[i]->Type())); + } } ctx->EmplaceBackOutputs(outputs); } else { diff --git a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc index 84f18baa55aea..d75c7cc4779ff 100644 --- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc @@ -149,34 +149,67 @@ bool SkipFeedOp(ir::Operation* op, const std::set& feed_names) { op->attributes().at("name").dyn_cast().AsString()); } -std::vector GetFakeTensorList(ir::Value new_input_tmp) { - std::vector vec_res; +std::vector> GetFakeTensorList( + ir::Value new_input_tmp) { + std::vector> vec_res; auto input_type = new_input_tmp.type(); - std::vector types; - if (input_type.isa()) { - types.push_back(input_type.dyn_cast()); - } else if (input_type.isa()) { - auto vec_inner_types = input_type.dyn_cast().data(); - for (size_t i = 0; i < vec_inner_types.size(); ++i) { - types.push_back( - vec_inner_types[0].dyn_cast()); - } - } - for (auto& type : types) { - auto ptr = new phi::Allocation(nullptr, 0, type.place()); + auto build_fake_dense_tensor = + [](const dialect::AllocatedDenseTensorType& type) { + auto ptr = new phi::Allocation(nullptr, 0, type.place()); + + std::shared_ptr holder(ptr); + + auto dtype = TransToPhiDataType(type.dtype()); - std::shared_ptr holder(ptr); + phi::DenseTensorMeta meta( + dtype, type.dims(), type.data_layout(), type.lod(), type.offset()); - auto dtype = TransToPhiDataType(type.dtype()); + return std::make_shared(holder, meta); + }; - phi::DenseTensorMeta meta( - dtype, type.dims(), type.data_layout(), type.lod(), type.offset()); + auto build_fake_selected_rows = + [](const dialect::AllocatedSelectedRowsType& type) { + auto ptr = new phi::Allocation(nullptr, 0, type.place()); - phi::DenseTensor fake_tensor(holder, meta); + std::shared_ptr holder(ptr); - vec_res.push_back(fake_tensor); + auto dtype = TransToPhiDataType(type.dtype()); + + phi::DenseTensorMeta meta( + dtype, type.dims(), type.data_layout(), type.lod(), type.offset()); + + std::vector rows; + int64_t height = 0; + rows.clear(); + + auto sr = std::make_shared(rows, height); + + phi::DenseTensor dense_tensor(holder, meta); + *(sr->mutable_value()) = dense_tensor; + + return sr; + }; + + if (input_type.isa()) { + vec_res.push_back(build_fake_dense_tensor( + input_type.dyn_cast())); + } else if (input_type.isa()) { + vec_res.push_back(build_fake_selected_rows( + input_type.dyn_cast())); + } else if (input_type.isa()) { + auto vec_inner_types = input_type.dyn_cast().data(); + for (size_t i = 0; i < vec_inner_types.size(); ++i) { + if (vec_inner_types[0].isa()) { + vec_res.push_back(build_fake_dense_tensor( + vec_inner_types[0].dyn_cast())); + } else if (vec_inner_types[0].isa()) { + vec_res.push_back(build_fake_selected_rows( + vec_inner_types[0].dyn_cast())); + } + } } + return vec_res; } @@ -514,7 +547,7 @@ phi::KernelKey GetKernelKey( auto fake_tensors = GetFakeTensorList(new_input_tmp); for (auto& fake_tensor : fake_tensors) { - kernel_key_parser.AssignKernelKeySet(fake_tensor); + kernel_key_parser.AssignKernelKeySet(*fake_tensor); } // Because we can't make sure the place when build data op @@ -617,6 +650,12 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, new_in.type() .dyn_cast() .place()); + } else if (new_in.type() + .isa()) { + out_places.push_back( + new_in.type() + .dyn_cast() + .place()); } else { PADDLE_THROW(phi::errors::Unimplemented( "only support dense tensor type for now")); @@ -759,6 +798,14 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, if (op_info_parser != nullptr) { kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0]; } + + if (op_item->name() == "pd.add_n_" || + op_item->name() == "pd.add_n_with_kernel") { + if (op_item->result(0).type().isa()) { + kernel_fn_str = "add_n_sr"; + } + } + auto kernel_key = GetKernelKey(op_item, place, map_value_pair, op_info_parser.get()); VLOG(6) << "kernel type " << kernel_key; @@ -929,9 +976,22 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, for (size_t j = 0; j < pre_define_op->num_operands(); ++j) { auto in_i = map_value_pair.at(pre_define_op->operand_source(j)); auto in_i_type = in_i.type(); - auto place = - in_i_type.dyn_cast() - .place(); + phi::Place place; + if (in_i_type.isa()) { + place = + in_i_type.dyn_cast() + .place(); + } else if (in_i_type + .isa()) { + place = + in_i_type.dyn_cast() + .place(); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "builtin.combine Input type only support " + "VectorType and " + "VectorType")); + } // get input args def type auto args_def = kernel.args_def(); @@ -949,12 +1009,30 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, // build memcopy op auto out_place = phi::TransToPhiPlace(kernel.InputAt(i).backend); - auto out_type = dialect::AllocatedDenseTensorType::get( - ctx, - out_place, - pre_define_op->operand_source(j) - .type() - .dyn_cast()); + + ir::Type out_type; + if (in_i_type.isa()) { + out_type = dialect::AllocatedDenseTensorType::get( + ctx, + out_place, + pre_define_op->operand_source(j) + .type() + .dyn_cast()); + } else if (in_i_type + .isa()) { + out_type = dialect::AllocatedSelectedRowsType::get( + ctx, + out_place, + pre_define_op->operand_source(j) + .type() + .dyn_cast()); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "builtin.combine Input type only support " + "VectorType and " + "VectorType")); + } + in_i = AddPlaceTransferOp(in_i, out_type, place, diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc index 38d833fc312de..e22fa5f3b3779 100644 --- a/paddle/fluid/ir_adaptor/translator/op_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc @@ -1112,8 +1112,8 @@ struct AddNOpTranscriber : public OpTranscriber { } const auto& op_info = ctx->GetRegisteredOpInfo(target_op_name); if (!op_info) { - IR_THROW( - "Op assign_value should have corresponding OpInfo pd.assign_value_"); + IR_THROW("Op assign_value should have corresponding OpInfo %s", + target_op_name); } return op_info; diff --git a/paddle/fluid/memory/allocation/custom_allocator.cc b/paddle/fluid/memory/allocation/custom_allocator.cc index 37503105c8bc0..c8225a160428f 100644 --- a/paddle/fluid/memory/allocation/custom_allocator.cc +++ b/paddle/fluid/memory/allocation/custom_allocator.cc @@ -29,7 +29,8 @@ void CustomAllocator::FreeImpl(phi::Allocation* allocation) { platform::errors::PermissionDenied("CustomDevice memory is " "freed in incorrect device. " "This may be a bug")); - + phi::DeviceManager::GetDeviceWithPlace(place_)->MemoryDeallocate( + allocation->ptr(), allocation->size()); delete allocation; } diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 18680fe678b5d..a7f6bc512ffce 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -284,7 +284,13 @@ class FCMKLDNNHandler std::shared_ptr AcquireWeightsMemoryWithReorder( const phi::DenseTensor* weights, const std::vector& scale_data) { - const std::string weights_key = this->memory_key_ + "@weights"; + const std::string weights_base_key = this->memory_key_ + "@weights"; + std::string weights_key; + weights_key.reserve(128); + weights_key = phi::funcs::ExtendKeyWithThreadInfoIfNeeded( + dev_ctx_, + phi::funcs::CreateKey( + dev_ctx_, weights_base_key, this->fwd_pd_->weights_desc())); auto memory_p = std::static_pointer_cast( this->dev_ctx_.GetBlob(weights_key)); @@ -410,7 +416,8 @@ class FCMKLDNNKernel : public framework::OpKernel { phi::funcs::CreateKey(dev_ctx, ctx.InputName("Input"), ctx.InputName("W"), - phi::vectorize(x->dims()))); + phi::vectorize(x->dims()), + phi::vectorize(weights->dims()))); auto inner_product_cache = std::static_pointer_cast(dev_ctx.GetBlob(cache_key)); diff --git a/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py b/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py index 8322b0ba2be83..783066f0fc906 100644 --- a/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py +++ b/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py @@ -211,6 +211,11 @@ class StaticTensorOperants : public TensorOperantsBase { #include "paddle/fluid/prim/api/manual_prim/prim_manual_api.h" #include "paddle/fluid/prim/utils/static/desc_tensor.h" +#include "paddle/fluid/primitive/backend/backend.h" +#include "paddle/fluid/primitive/type/lazy_tensor.h" + +PHI_DECLARE_bool(enable_new_ir_api); + """ @@ -219,47 +224,88 @@ class StaticTensorOperants : public TensorOperantsBase { namespace prim { using DescTensor = paddle::prim::DescTensor; +using LazyTensor = paddle::primitive::LazyTensor; Tensor StaticTensorOperants::add(const Tensor& x, const Scalar& y) { - return paddle::prim::add(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::add(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); + } else { + return paddle::prim::add(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + } } Tensor StaticTensorOperants::subtract(const Tensor& x, const Scalar& y) { - return paddle::prim::subtract(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::subtract(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); + } else { + return paddle::prim::subtract(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + } } Tensor StaticTensorOperants::multiply(const Tensor& x, const Scalar& y) { - return paddle::prim::scale(x, y, 0.0f, true); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::scale(x, y, 0.0f, true); + } else { + return paddle::prim::scale(x, y, 0.0f, true); + } } Tensor StaticTensorOperants::divide(const Tensor& x, const Scalar& y) { - return paddle::prim::divide(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::divide(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); + } else { + return paddle::prim::divide(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + } } Tensor StaticTensorOperants::add(const Scalar& x, const Tensor& y) { - return paddle::prim::add(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::add(paddle::primitive::backend::full(y.shape(), x, y.dtype(), y.place()), y); + } else { + return paddle::prim::add(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); + } } + Tensor StaticTensorOperants::subtract(const Scalar& x, const Tensor& y) { - return paddle::prim::subtract(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::subtract(paddle::primitive::backend::full(y.shape(), x, y.dtype(), y.place()), y); + } else { + return paddle::prim::subtract(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); + } } Tensor StaticTensorOperants::multiply(const Scalar& x, const Tensor& y) { - return paddle::prim::scale(y, x, 0.0f, true); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::scale(y, x, 0.0f, true); + } else { + return paddle::prim::scale(y, x, 0.0f, true); + } } Tensor StaticTensorOperants::divide(const Scalar& x, const Tensor& y) { - return paddle::prim::divide(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::divide(paddle::primitive::backend::full(y.shape(), x, y.dtype(), y.place()), y); + } else { + return paddle::prim::divide(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); + } } Tensor StaticTensorOperants::pow(const Tensor& x, const Tensor& y) { - return paddle::prim::elementwise_pow(x, y); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::elementwise_pow(x, y); + } else { + return paddle::prim::elementwise_pow(x, y); + } } Tensor StaticTensorOperants::pow(const Tensor& x, const Scalar& y) { - return paddle::prim::elementwise_pow(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + if (FLAGS_enable_new_ir_api) { + return paddle::primitive::backend::elementwise_pow(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); + } else { + return paddle::prim::elementwise_pow(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); + } } - """ @@ -339,13 +385,21 @@ def gene_eager_tensor_operants_implementation(self): def gene_static_tensor_func_call(self): api_func_name = self.get_api_func_name() - + backend_static_func_name = ( + 'paddle::primitive::backend::' + api_func_name + '' + ) prim_static_func_name = ( 'paddle::prim::' + api_func_name + '' ) - prim_static_func_parameters = self.get_func_args() + static_func_parameters = self.get_func_args() + + static_tensor_func_call = f"""if (FLAGS_enable_new_ir_api) {{ + return {backend_static_func_name}({static_func_parameters}); + }} else {{ + return {prim_static_func_name}({static_func_parameters}); + }}""" - return f"""return {prim_static_func_name}({prim_static_func_parameters});""" + return static_tensor_func_call def gene_static_tensor_operants_implementation(self): api_code = "" diff --git a/paddle/fluid/prim/utils/static/CMakeLists.txt b/paddle/fluid/prim/utils/static/CMakeLists.txt index aa72fadb591a6..483c3eabc05d1 100644 --- a/paddle/fluid/prim/utils/static/CMakeLists.txt +++ b/paddle/fluid/prim/utils/static/CMakeLists.txt @@ -6,4 +6,4 @@ cc_library( cc_library( static_tensor_operants SRCS static_tensor_operants.cc - DEPS static_prim_api) + DEPS static_prim_api primitive_backend_static_experimental) diff --git a/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 b/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 index ab040254355f5..6d69433737633 100644 --- a/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 +++ b/paddle/fluid/primitive/codegen/templates/rule/vjp/generated/generated_vjp.cc.j2 @@ -10,7 +10,9 @@ #include "paddle/fluid/primitive/type/lazy_tensor.h" #include "paddle/fluid/primitive/utils/utils.h" #include "paddle/ir/core/operation.h" +#include "paddle/phi/core/flags.h" +PHI_DECLARE_string(tensor_operants_mode); namespace paddle { namespace primitive { @@ -95,6 +97,7 @@ for (size_t i=0; i< stop_gradients[0].size(); i++ ) { {% endmacro %} {% macro body_prim(api) %} +FLAGS_tensor_operants_mode = "static"; {% for i in range(api.outputs|length) %} {% if api.outputs[i].typename=='Tensor' %} paddle::Tensor* {{api.outputs[i].name}} = !stop_gradients[{{i}}][0] ? &vjp_res[{{i}}][0] : nullptr; diff --git a/paddle/fluid/primitive/rule/vjp/details.h b/paddle/fluid/primitive/rule/vjp/details.h index e018cccdef7a0..12fb66127a298 100644 --- a/paddle/fluid/primitive/rule/vjp/details.h +++ b/paddle/fluid/primitive/rule/vjp/details.h @@ -39,10 +39,7 @@ void divide_grad(const Tensor& x, Tensor* dy) { if (dy) { // dy = -(x/y^2) * dout - auto denominator = - elementwise_pow(y, full(y.shape(), 2.0, y.dtype(), y.place())); - auto dy_res = scale( - multiply(divide(x, denominator), out_grad), -1.0, 0.0, true); + auto dy_res = -(x / y.pow(2.0)) * out_grad; if (x.dims() != y.dims()) { // Maybe need reduce here phi::DDim reduce_dim = get_reduce_dims(y.dims(), x.dims()); @@ -61,7 +58,7 @@ void divide_grad(const Tensor& x, if (dx) { // dx = (1/y) * dout auto one_tensor = full(phi::vectorize(y.dims()), 1.0, y.dtype()); - auto dx_res = multiply(divide(one_tensor, y), out_grad); + auto dx_res = one_tensor / y * out_grad; if (y.dims() != x.dims()) { // Maybe need reduce here auto reduce_dim = get_reduce_dims(x.dims(), y.dims()); diff --git a/paddle/ir/core/block.cc b/paddle/ir/core/block.cc index f99ec340e4c49..04d59e2582ebe 100644 --- a/paddle/ir/core/block.cc +++ b/paddle/ir/core/block.cc @@ -13,6 +13,9 @@ // limitations under the License. #include "paddle/ir/core/block.h" + +#include + #include "paddle/ir/core/enforce.h" #include "paddle/ir/core/operation.h" #include "paddle/ir/core/region.h" @@ -60,4 +63,34 @@ Block::UseIterator Block::use_end() const { return Block::UseIterator(); } bool Block::HasOneUse() const { return first_use_ && !first_use_.next_use(); } +void Block::ResetOpListOrder(const OpListType &new_op_list) { + IR_ENFORCE(new_op_list.size() == ops_.size(), + "The size of new_op_list not same with ops_."); + IR_ENFORCE(TopoOrderCheck(new_op_list), + "The new_op_list is not in topological order."); + + ops_.clear(); + for (Operation *op : new_op_list) { + push_back(op); + } +} + +bool Block::TopoOrderCheck(const OpListType &op_list) { + std::unordered_set visited_values; + for (const Operation *op : op_list) { + if (op->num_operands() > 0) { + for (size_t i = 0; i < op->num_operands(); ++i) { + auto operand = op->operand_source(i); + if (operand && visited_values.count(op->operand_source(i)) == 0) { + return false; + } + } + } + for (size_t i = 0; i < op->results().size(); ++i) { + visited_values.insert(op->result(i)); + } + } + return true; +} + } // namespace ir diff --git a/paddle/ir/core/block.h b/paddle/ir/core/block.h index ebe4b6cb8ecf4..2cf00037eb5fc 100644 --- a/paddle/ir/core/block.h +++ b/paddle/ir/core/block.h @@ -70,6 +70,8 @@ class IR_API Block { bool HasOneUse() const; BlockOperand *first_use_addr() { return &first_use_; } + void ResetOpListOrder(const OpListType &new_op_list); + private: Block(Block &) = delete; Block &operator=(const Block &) = delete; @@ -78,6 +80,8 @@ class IR_API Block { friend class Region; void SetParent(Region *parent, Region::iterator position); + static bool TopoOrderCheck(const OpListType &op_list); + private: Region *parent_; // not owned OpListType ops_; // owned diff --git a/paddle/ir/dialect/shape/ir/shape_op.cc b/paddle/ir/dialect/shape/ir/shape_op.cc index 3681aafa36520..776503ea269e3 100644 --- a/paddle/ir/dialect/shape/ir/shape_op.cc +++ b/paddle/ir/dialect/shape/ir/shape_op.cc @@ -112,6 +112,7 @@ bool SymbolicDim::merge(SymbolicDim other) { if (!isDynamic() && !other.isDynamic() && getValue() != other.getValue()) return false; if (isDynamic() && !other.isDynamic()) updateValue(other.getValue()); + if (!isDynamic() && other.isDynamic()) other.updateValue(getValue()); bool knownNonNegativeFlag = getKnownNonNegative() || other.getKnownNonNegative(); diff --git a/paddle/ir/dialect/shape/utils/shape_utils.cc b/paddle/ir/dialect/shape/utils/shape_utils.cc index 182d335f71c3d..f9d78a63184cb 100644 --- a/paddle/ir/dialect/shape/utils/shape_utils.cc +++ b/paddle/ir/dialect/shape/utils/shape_utils.cc @@ -46,6 +46,154 @@ const std::string SymbolTable::insert(ir::Operation* symbol) { return name; } +bool SymbolicDimMgr::load() { + for (auto op_it = m_.block()->begin(); op_it != m_.block()->end(); op_it++) { + symbolTable_.insert(*op_it); + SymbolicDim op = (*op_it)->dyn_cast(); + if (!op) continue; + symbolDimUnionSet_[op] = op; + symbolNameSet_.insert(op.getSymName()); + } + return loadShapeConstraintGraph(); +} + +bool SymbolicDimMgr::loadShapeConstraintGraph() { + // TODO(liujinnan): add more constraint function. currently, only support + // tie_product_equal. + auto constraint_vec = + symbolTable_.lookup("tie_product_equal"); + + if (!constraint_vec.size()) return true; + + auto build_sym_product = [&](std::vector range, + SymbolicDimProduct& product) { + for (Value v : range) { + auto definingOp = v.GetDefiningOp(); + if (auto constOp = definingOp->dyn_cast()) { + product.factor *= constOp.value().dyn_cast().data(); + continue; + } else if (auto dimOp = definingOp->dyn_cast()) { + auto sym = symbolTable_.lookup(dimOp.getName()); + if (!sym) return false; + product.symbols.push_back(sym); + continue; + } + return false; + } + return true; + }; + for (auto op : constraint_vec) { + SymbolicDimProduct lhs, rhs; + if (!build_sym_product(op.getLhs(), lhs) || + !build_sym_product(op.getRhs(), rhs) || + !mapSymbolicDimProductEqual(lhs, rhs)) + return false; + } + return true; +} + +int64_t gcd(int64_t m, int64_t n) { + if (!m) return n; + if (!n) return m; + return (m < n) ? gcd(m, n % m) : gcd(m % n, n); +} + +bool SymbolicDimMgr::mapSymbolicDimProductEqual(const SymbolicDimProduct& lhs, + const SymbolicDimProduct& rhs) { + SymbolicDimProduct newLhs, newRhs; + std::tie(newLhs, newRhs) = simplifySymbolicDimProductPair(lhs, rhs); + + // early return for identity case. + if (newLhs == newRhs) return true; + + if (newLhs.factor == newRhs.factor && newLhs.symbols.size() == 1 && + newRhs.symbols.size() == 1) { + return mapSymbolicDimEqual(newLhs.symbols[0], newRhs.symbols[0]); + } else if (newLhs.symbols.size() == 0 && newRhs.symbols.size() == 1 && + newRhs.factor == 1) { + return mapSymbolicDimEqual(newConstantSymbolicDim(newLhs.factor), + newRhs.symbols[0]); + } else if (newRhs.symbols.size() == 0 && newLhs.symbols.size() == 1 && + newLhs.factor == 1) { + return mapSymbolicDimEqual(newConstantSymbolicDim(newRhs.factor), + newLhs.symbols[0]); + } + + productEqualityMap_[newLhs][newRhs] = productEqualityMap_[newRhs][newLhs] = + true; + + productEqualityMapUpdated_ = false; + return true; +} + +std::pair +SymbolicDimMgr::simplifySymbolicDimProductPair(const SymbolicDimProduct& x, + const SymbolicDimProduct& y) { + auto lhs = simplifySymbolicDimProduct(x); + auto rhs = simplifySymbolicDimProduct(y); + + SymbolicDimProduct newLhs, newRhs; + int64_t gcdFactor = gcd(std::abs(lhs.factor), std::abs(rhs.factor)); + if (!gcdFactor) return std::make_pair(std::move(newLhs), std::move(newRhs)); + if (std::abs(lhs.factor) < std::abs(rhs.factor)) { + if (lhs.factor < 0) gcdFactor = -gcdFactor; + } else { + if (rhs.factor < 0) gcdFactor = -gcdFactor; + } + + newLhs.factor = lhs.factor / gcdFactor; + newRhs.factor = rhs.factor / gcdFactor; + + std::unordered_map lhsSymbolMap; + std::unordered_map rhsSymbolMap; + for (SymbolicDim op : lhs.symbols) ++lhsSymbolMap[op]; + for (SymbolicDim op : rhs.symbols) ++rhsSymbolMap[op]; + + for (SymbolicDim op : lhs.symbols) { + auto it = rhsSymbolMap.find(op); + if (it != rhsSymbolMap.end() && op.getKnownNonSizeZero()) { + if (--it->second == 0) rhsSymbolMap.erase(it); + continue; + } + newLhs.symbols.push_back(op); + } + + for (SymbolicDim op : rhs.symbols) { + auto it = lhsSymbolMap.find(op); + if (it != lhsSymbolMap.end() && op.getKnownNonSizeZero()) { + if (--it->second == 0) lhsSymbolMap.erase(it); + continue; + } + newRhs.symbols.push_back(op); + } + + if (!newLhs.factor) newLhs.symbols.clear(); + if (!newRhs.factor) newRhs.symbols.clear(); + + return std::make_pair(std::move(newLhs), std::move(newRhs)); +} + +SymbolicDimProduct SymbolicDimMgr::simplifySymbolicDimProduct( + const SymbolicDimProduct& x) { + std::vector copied; + copied.reserve(x.symbols.size()); + for (SymbolicDim op : x.symbols) copied.push_back(getRootSymbolicDim(op)); + + sort(copied.begin(), copied.end(), [&](SymbolicDim lhs, SymbolicDim rhs) { + return compareSymbolicDimNames(lhs.getSymName(), rhs.getSymName()); + }); + SymbolicDimProduct newX; + newX.factor = x.factor; + for (SymbolicDim op : copied) { + if (!op.isDynamic()) { + newX.factor *= op.getValue(); + } else { + newX.symbols.push_back(op); + } + } + return newX; +} + const std::string SymbolicDimMgr::getNextName() { std::string name; do { @@ -123,4 +271,154 @@ bool SymbolicDimMgr::mapSymbolicDimEqual(SymbolicDim lhs, SymbolicDim rhs) { return true; } +SymbolicDimProduct* SymbolicDimMgr::symbolicDimProductDivide( + const SymbolicDimProduct& lhs, const SymbolicDimProduct& rhs) { + SymbolicDimProduct newLhs, newRhs; + std::tie(newLhs, newRhs) = simplifySymbolicDimProductPair(lhs, rhs); + + if (newLhs.factor == 0 || newRhs.factor == 0) return nullptr; + if (newLhs.factor % newRhs.factor != 0) return nullptr; + if (newLhs.symbols.size() < newRhs.symbols.size()) return nullptr; + + SymbolicDimProduct* result = new SymbolicDimProduct(); + result->factor = newLhs.factor / newRhs.factor; + + std::unordered_map symProcMap; + for (SymbolicDim sym : newRhs.symbols) ++symProcMap[sym]; + + for (SymbolicDim sym : newLhs.symbols) { + auto it = symProcMap.find(sym); + if (it == symProcMap.end()) { + result->symbols.push_back(sym); + continue; + } + if (--it->second == 0) { + symProcMap.erase(it); + continue; + } + } + + if (!symProcMap.empty()) return nullptr; + return result; +} + +bool SymbolicDimMgr::isMultipleOfKnownSymbolicDimProductEqualPair( + const SymbolicDimProduct& lhs, const SymbolicDimProduct& rhs) { + for (auto& pairOutter : productEqualityMap_) { + const SymbolicDimProduct& x = pairOutter.first; + auto factorX = symbolicDimProductDivide(lhs, x); + if (!factorX) continue; + for (auto& pairInner : pairOutter.second) { + if (!pairInner.second) continue; + const SymbolicDimProduct& y = pairInner.first; + auto factorY = symbolicDimProductDivide(rhs, y); + if (!factorY || (*factorX) != (*factorY)) continue; + return true; + } + } + + return false; +} + +bool SymbolicDimMgr::updateProductEqualityMap() { + // early return if nothing is updated. + if (productEqualityMapUpdated_) return true; + + SymbolicDimProductMap newMap; + std::unordered_set productSet; + for (auto& pairOutter : productEqualityMap_) { + const SymbolicDimProduct& x = pairOutter.first; + for (auto& pairInner : pairOutter.second) { + if (!pairInner.second) continue; + const SymbolicDimProduct& y = pairInner.first; + SymbolicDimProduct newX, newY; + std::tie(newX, newY) = simplifySymbolicDimProductPair(x, y); + if (newX == newY) continue; + newMap[newX][newY] = newMap[newY][newX] = true; + productSet.insert(newX); + productSet.insert(newY); + } + } + // hash function of SymbolicDimProduct is expensive, thus we map it to integer + // domain first. + std::unordered_map symProd2Idx; + std::vector idx2SymProd(productSet.size()); + std::vector idx2root(productSet.size()); + for (auto& x : productSet) { + size_t idx = symProd2Idx.size(); + symProd2Idx[&x] = idx; + idx2SymProd[idx] = &x; + idx2root[idx] = idx; + } + + auto getRootIdx = [&](size_t root) { + std::vector path; + while (idx2root[root] != root) { + path.push_back(root); + root = idx2root[root]; + } + for (size_t idx : path) idx2root[idx] = root; + return root; + }; + + for (size_t x = 0; x < symProd2Idx.size(); ++x) { + auto& xProd = *idx2SymProd[x]; + auto& rowMap = newMap[xProd]; + size_t xRoot = getRootIdx(x); + for (size_t y = x; y < symProd2Idx.size(); ++y) { + auto& yProd = *idx2SymProd[y]; + if (!rowMap[yProd]) continue; + idx2root[getRootIdx(y)] = xRoot; + } + } + + for (size_t x = 0; x < symProd2Idx.size(); ++x) + for (size_t y = x; y < symProd2Idx.size(); ++y) { + if (getRootIdx(x) != getRootIdx(y)) continue; + auto& xSymProd = *idx2SymProd[x]; + auto& ySymProd = *idx2SymProd[y]; + + newMap[xSymProd][ySymProd] = newMap[ySymProd][xSymProd] = true; + } + + productEqualityMap_ = std::move(newMap); + + for (auto& x : productSet) + for (auto& y : productSet) { + if (!productEqualityMap_[x][y]) continue; + productEqualityMap_[x][y] = productEqualityMap_[y][x] = false; + if (!isMultipleOfKnownSymbolicDimProductEqualPair(x, y)) { + productEqualityMap_[x][y] = productEqualityMap_[y][x] = true; + } + } + + std::unordered_set toRemove; + for (auto& x : productSet) { + if (std::all_of(productSet.begin(), + productSet.end(), + [&](const SymbolicDimProduct& y) { + return !productEqualityMap_[x][y]; + })) { + toRemove.insert(x); + } + } + + for (auto& x : toRemove) { + productEqualityMap_.erase(x); + } + + productEqualityMapUpdated_ = true; + return true; +} + +bool SymbolicDimMgr::isSymbolicDimProductEqual(const SymbolicDimProduct& lhs, + const SymbolicDimProduct& rhs) { + SymbolicDimProduct newLhs, newRhs; + std::tie(newLhs, newRhs) = simplifySymbolicDimProductPair(lhs, rhs); + + // early return for identity case. + if (newLhs == newRhs) return true; + IR_ENFORCE(updateProductEqualityMap(), "Update product equality map failed."); + return isMultipleOfKnownSymbolicDimProductEqualPair(newLhs, newRhs); +} } // namespace ir diff --git a/paddle/ir/dialect/shape/utils/shape_utils.h b/paddle/ir/dialect/shape/utils/shape_utils.h index 70f2a16c4481e..8d5fab1a1c811 100644 --- a/paddle/ir/dialect/shape/utils/shape_utils.h +++ b/paddle/ir/dialect/shape/utils/shape_utils.h @@ -18,6 +18,7 @@ #include #include #include +#include "paddle/ir/core/builtin_attribute.h" #include "paddle/ir/core/builtin_op.h" #include "paddle/ir/core/utils.h" #include "paddle/ir/dialect/shape/ir/shape_op.h" @@ -45,7 +46,6 @@ class SymbolTable { public: explicit SymbolTable(ir::Operation* symbolTableOp) : symbolTableOp_(symbolTableOp) {} - template typename std::enable_if::value, SymbolicDim>::type @@ -97,6 +97,7 @@ struct SymProductHasher { class SymbolicDimMgr { public: explicit SymbolicDimMgr(ir::ModuleOp m); + bool load(); SymbolicDim newSymbolicDim(const std::string& name = {}); SymbolicDim newConstantSymbolicDim(int64_t val); std::vector createSymbolicDimsForRankedValue(Value value); @@ -104,9 +105,28 @@ class SymbolicDimMgr { bool isSymbolicDimEqual(SymbolicDim lhs, SymbolicDim rhs); SymbolTable& symbolTable() { return symbolTable_; } bool mapSymbolicDimEqual(SymbolicDim lhs, SymbolicDim rhs); + SymbolicDimProduct simplifySymbolicDimProduct(const SymbolicDimProduct& x); + std::pair + simplifySymbolicDimProductPair(const SymbolicDimProduct& x, + const SymbolicDimProduct& y); + SymbolicDimProduct* symbolicDimProductDivide(const SymbolicDimProduct& x, + const SymbolicDimProduct& y); + + bool save(); // TODO(liujinnan): load constraint func + + bool isSymbolicDimProductEqual(const SymbolicDimProduct& lhs, + const SymbolicDimProduct& rhs); + bool mapSymbolicDimProductEqual(const SymbolicDimProduct& lhs, + const SymbolicDimProduct& rhs); private: const std::string getNextName(); + bool updateProductEqualityMap(); + bool isMultipleOfKnownSymbolicDimProductEqualPair( + const SymbolicDimProduct& lhs, const SymbolicDimProduct& rhs); + bool saveShapeConstraintGraph(); // TODO(liujinnan): load & save + // shape_constraint_func + bool loadShapeConstraintGraph(); private: ir::ModuleOp m_; @@ -127,6 +147,6 @@ class SymbolicDimMgr { std::unordered_map, SymProductHasher>; SymbolicDimProductMap productEqualityMap_; + bool productEqualityMapUpdated_ = true; }; - } // namespace ir diff --git a/paddle/ir/transforms/reorder_block_ops_pass.cc b/paddle/ir/transforms/reorder_block_ops_pass.cc new file mode 100644 index 0000000000000..d922326677985 --- /dev/null +++ b/paddle/ir/transforms/reorder_block_ops_pass.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/ir/transforms/reorder_block_ops_pass.h" + +#include + +#include "paddle/ir/core/builtin_op.h" +#include "paddle/ir/core/program.h" +#include "paddle/ir/pass/pass.h" + +namespace { + +// TODO(wilber): After support SideEffectTrait, Only NoSideEffectTrait op can be +// removed by dce pass. +// Now just a naive implementation. +class ReorderBlockOpsPass : public ir::Pass { + public: + ReorderBlockOpsPass() : ir::Pass("ReorderBlockOpsPass", 0) {} + + void Run(ir::Operation *op) override { + IR_ENFORCE(op->num_regions() > 0, + "ReorderBlockOpsPass should run on Operation which regions " + "number greater than 0."); + for (size_t i = 0; i < op->num_regions(); ++i) { + for (auto *block : op->region(i)) { + std::list res_op_list; + std::unordered_map + reorder_op_dep_cnt; // op -> dependent input count + std::unordered_set visited_values; + std::queue op_que; + + auto update_op_que = [&](ir::Operation *op) { + for (size_t i = 0; i < op->results().size(); ++i) { + auto result = op->result(i); + visited_values.insert(result); + for (auto it = result.use_begin(); it != result.use_end(); ++it) { + if (reorder_op_dep_cnt.count(it->owner())) { + reorder_op_dep_cnt[it->owner()]--; + if (reorder_op_dep_cnt[it->owner()] == 0) { + op_que.push(it->owner()); + } + } + } + } + }; + + for (auto &op : *block) { + bool has_dependency = false; + if (op->num_operands() > 0) { + for (size_t i = 0; i < op->num_operands(); ++i) { + auto operand = op->operand_source(i); + if (operand && visited_values.count(op->operand_source(i)) == 0) { + reorder_op_dep_cnt[op]++; + has_dependency = true; + } + } + } + if (!has_dependency) { + res_op_list.push_back(op); + update_op_que(op); + } + } + + if (reorder_op_dep_cnt.empty()) { + return; + } + + while (!op_que.empty()) { + auto *op = op_que.front(); + op_que.pop(); + res_op_list.push_back(op); + update_op_que(op); + } + VLOG(4) << "ReorderBlockOpsPass is applied."; + block->ResetOpListOrder(res_op_list); + } + } + } + + bool CanApplyOn(ir::Operation *op) const override { + return op->num_regions() > 0; + } +}; + +} // namespace + +namespace ir { + +std::unique_ptr CreateReorderBlockOpsPass() { + return std::make_unique(); +} + +} // namespace ir diff --git a/paddle/ir/transforms/reorder_block_ops_pass.h b/paddle/ir/transforms/reorder_block_ops_pass.h new file mode 100644 index 0000000000000..f668471fc9e04 --- /dev/null +++ b/paddle/ir/transforms/reorder_block_ops_pass.h @@ -0,0 +1,26 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/ir/core/dll_decl.h" + +namespace ir { + +class Pass; + +IR_API std::unique_ptr CreateReorderBlockOpsPass(); + +} // namespace ir diff --git a/paddle/phi/backends/onednn/onednn_helper.h b/paddle/phi/backends/onednn/onednn_helper.h index 84e36a26ca487..1d61004b36161 100644 --- a/paddle/phi/backends/onednn/onednn_helper.h +++ b/paddle/phi/backends/onednn/onednn_helper.h @@ -154,6 +154,12 @@ inline void AppendKey(std::string* key, const T& num) { key->append(std::to_string(num)); } +template <> +inline void AppendKey(std::string* key, + const dnnl::memory::format_kind& format) { + key->append(std::to_string(static_cast(format))); +} + template <> inline void AppendKey(std::string* key, const dnnl::memory::format_tag& format) { @@ -171,6 +177,25 @@ inline void AppendKey(std::string* key, const dnnl::algorithm& algorithm) { key->append(std::to_string(static_cast(algorithm))); } +template <> +inline void AppendKey(std::string* key, const dnnl::memory::dims& dims) { + for (size_t i = 0; i < dims.size(); i++) { + AppendKey(key, static_cast(dims[i])); + } +} + +template <> +inline void AppendKey(std::string* key, const dnnl::memory::desc& md) { + AppendKey(key, md.get_dims()); + AppendKey(key, md.get_data_type()); + AppendKey(key, md.get_format_kind()); + AppendKey(key, md.get_inner_blks()); + AppendKey(key, md.get_inner_idxs()); + AppendKey(key, md.get_inner_nblks()); + AppendKey(key, md.get_padded_dims()); + AppendKey(key, md.get_strides()); +} + template <> inline void AppendKey(std::string* key, const dnnl::normalization_flags& flags) { diff --git a/paddle/phi/core/extended_tensor.cc b/paddle/phi/core/extended_tensor.cc index e5b5c3773f867..31d0fb25c88c1 100644 --- a/paddle/phi/core/extended_tensor.cc +++ b/paddle/phi/core/extended_tensor.cc @@ -38,7 +38,7 @@ DataType ExtendedTensor::dtype() const { DataLayout ExtendedTensor::layout() const { PADDLE_THROW(phi::errors::Unavailable( - "ExtendedTensor does not support `dtype` method.")); + "ExtendedTensor does not support `layout` method.")); } bool ExtendedTensor::valid() const { diff --git a/paddle/phi/core/meta_tensor.cc b/paddle/phi/core/meta_tensor.cc index 5ea5a07960923..9b9df5c1ff4aa 100644 --- a/paddle/phi/core/meta_tensor.cc +++ b/paddle/phi/core/meta_tensor.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" #include "paddle/phi/core/enforce.h" @@ -271,6 +272,8 @@ const LoD& MetaTensor::lod() const { return static_cast(tensor_)->non_zero_elements().lod(); } else if (phi::SparseCsrTensor::classof(tensor_)) { return static_cast(tensor_)->non_zero_elements().lod(); + } else if (paddle::dialect::IrMetaTensor::classof(tensor_)) { + return static_cast(tensor_)->lod(); } else { PADDLE_THROW(phi::errors::Unimplemented("Unsupported getting lod of `%s`.", tensor_->type_info().name())); diff --git a/paddle/phi/core/utils/type_info.cc b/paddle/phi/core/utils/type_info.cc index 38e17b57f633d..99b134b6e7960 100644 --- a/paddle/phi/core/utils/type_info.cc +++ b/paddle/phi/core/utils/type_info.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include +#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_meta_tensor.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/custom/custom_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" @@ -50,6 +51,7 @@ template class TypeInfoTraits; template class TypeInfoTraits; template class TypeInfoTraits; template class TypeInfoTraits; +template class TypeInfoTraits; template class TypeInfoTraits; template class TypeInfoTraits; diff --git a/paddle/utils/flags.h b/paddle/utils/flags.h index 3f68ba2f5dc1f..06c84ae15ab7f 100644 --- a/paddle/utils/flags.h +++ b/paddle/utils/flags.h @@ -79,8 +79,8 @@ using gflags::DoubleFromEnv; using gflags::Int32FromEnv; using gflags::Int64FromEnv; using gflags::StringFromEnv; -using gflags::UInt32FromEnv; -using gflags::UInt64FromEnv; +using gflags::Uint32FromEnv; +using gflags::Uint64FromEnv; #else #define DEFINE_FROM_ENV_FUNC(type, name) \ inline type name##FromEnv(const std::string& env_var_name, \ @@ -90,9 +90,9 @@ using gflags::UInt64FromEnv; DEFINE_FROM_ENV_FUNC(bool, Bool); DEFINE_FROM_ENV_FUNC(int32_t, Int32); -DEFINE_FROM_ENV_FUNC(uint32_t, UInt32); +DEFINE_FROM_ENV_FUNC(uint32_t, Uint32); DEFINE_FROM_ENV_FUNC(int64_t, Int64); -DEFINE_FROM_ENV_FUNC(uint64_t, UInt64); +DEFINE_FROM_ENV_FUNC(uint64_t, Uint64); DEFINE_FROM_ENV_FUNC(double, Double); DEFINE_FROM_ENV_FUNC(std::string, String); diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index dbcc4f0c05fda..f0411d096dee4 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -1594,9 +1594,9 @@ def poisson_nll_loss( + 0.5 * paddle.log(2 * math.pi * label) ) loss_out += paddle.where( - stirling_approx <= 1, - paddle.zeros_like(stirling_approx), + label > 1, stirling_approx, + paddle.zeros_like(stirling_approx), ) if reduction == 'mean': loss_out = paddle.mean(loss_out) diff --git a/test/cpp/ir/pattern_rewrite/pattern_rewrite_test.cc b/test/cpp/ir/pattern_rewrite/pattern_rewrite_test.cc index e007b73c9f0ed..fcca8cde7d5aa 100644 --- a/test/cpp/ir/pattern_rewrite/pattern_rewrite_test.cc +++ b/test/cpp/ir/pattern_rewrite/pattern_rewrite_test.cc @@ -42,6 +42,7 @@ #include "paddle/ir/pattern_rewrite/pattern_match.h" #include "paddle/ir/pattern_rewrite/pattern_rewrite_driver.h" #include "paddle/ir/transforms/dead_code_elimination_pass.h" +#include "paddle/ir/transforms/reorder_block_ops_pass.h" #include "paddle/phi/core/kernel_registry.h" // NOTE(zhangbo9674): File pd_op.h is generated by op_gen.py, see details in @@ -1099,6 +1100,7 @@ TEST(pattern_rewrite, Patterns) { pm.AddPass(std::make_unique()); pm.AddPass(ir::CreateConstantFoldingPass()); pm.AddPass(ir::CreateDeadCodeEliminationPass()); + pm.AddPass(ir::CreateReorderBlockOpsPass()); pm.EnablePassTiming(); pm.EnableIRPrinting(); // pm.EnableIRPrinting(std::make_unique( diff --git a/test/cpp/ir/shape_dialect/symbolic_op_test.cc b/test/cpp/ir/shape_dialect/symbolic_op_test.cc index 7b0751d17ac13..138e5e5b0d8c9 100644 --- a/test/cpp/ir/shape_dialect/symbolic_op_test.cc +++ b/test/cpp/ir/shape_dialect/symbolic_op_test.cc @@ -93,7 +93,10 @@ TEST(assist_struct_test, symbolic_dim_table) { EXPECT_FALSE(symbolTable.lookup("S1")); } -TEST(assist_struct_test, symbolic_dim_mgr) { +TEST(assist_struct_test, symbolic_dim_mgr_simple) { + /******************************************************/ + /* Mgr simple version, only SymbolicDim related func. */ + /******************************************************/ ir::IrContext *ctx = ir::IrContext::Instance(); ir::Program program(ctx); ctx->GetOrRegisterDialect(); @@ -141,6 +144,175 @@ TEST(assist_struct_test, symbolic_dim_mgr) { EXPECT_FALSE(symDimMgr.isSymbolicDimEqual(symDimS0, symDimC10)); } +TEST(assist_struct_test, symbolic_dim_mgr_complex) { + /***************************************************************/ + /* Mgr with constraintOp, and SymbolicDimProduct related func. */ + /***************************************************************/ + ir::IrContext *ctx = ir::IrContext::Instance(); + ir::Program program(ctx); + ctx->GetOrRegisterDialect(); + ctx->GetOrRegisterDialect(); + ir::Builder builder = ir::Builder(ctx, program.block()); + + ir::dialect::SymbolicDim symDimS0 = builder.Build( + "S0", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS1 = builder.Build( + "S1", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS2 = builder.Build( + "S2", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS3 = builder.Build( + "S3", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS4 = builder.Build( + "S4", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS5 = builder.Build( + "S5", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS6 = builder.Build( + "S6", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS7 = builder.Build( + "S7", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS8 = builder.Build( + "S8", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS9 = builder.Build( + "S9", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS10 = builder.Build( + "S10", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS11 = builder.Build( + "S11", -100000, false, false, true, true); + ir::dialect::SymbolicDim symDimS12 = builder.Build( + "S12", -100000, false, false, true, false); + ir::dialect::SymbolicDim symDimC10 = builder.Build( + "C10", 10, true, false, true, true); + ir::dialect::SymbolicDim symDimC20 = builder.Build( + "C20", 20, true, false, true, true); + + ir::OpResult dimOpS0 = builder.Build("S0").out(); + ir::OpResult dimOpS1 = builder.Build("S1").out(); + ir::OpResult dimOpS2 = builder.Build("S2").out(); + ir::OpResult dimOpS3 = builder.Build("S3").out(); + ir::OpResult dimOpS4 = builder.Build("S4").out(); + ir::OpResult dimOpS5 = builder.Build("S5").out(); + ir::OpResult dimOpS6 = builder.Build("S6").out(); + ir::OpResult dimOpS7 = builder.Build("S7").out(); + ir::OpResult dimOpS8 = builder.Build("S8").out(); + ir::OpResult dimOpS9 = builder.Build("S9").out(); + ir::OpResult dimOpS10 = builder.Build("S10").out(); + ir::OpResult dimOpS11 = builder.Build("S11").out(); + ir::OpResult dimOpC10 = builder.Build("C10").out(); + ir::OpResult dimOpC20 = builder.Build("C20").out(); + ir::OpResult constant = + builder + .Build(ir::Int32Attribute::get(ctx, 2), + ir::Int32Type::get(ctx)) + ->result(0); + + // Mark S1 == S2. + builder.Build( + 2, 2, std::vector{constant, dimOpS1, dimOpS2, constant}); + // Mark S0 * S1 == S2 * S3, For check S0 == S3. + builder.Build( + 2, 2, std::vector{dimOpS0, dimOpS1, dimOpS2, dimOpS3}); + // Mark S4 * S0 * S1 == S2 * S3 * S5, For check S4 == S5. + builder.Build( + 3, + 3, + std::vector{ + dimOpS4, dimOpS0, dimOpS1, dimOpS2, dimOpS3, dimOpS5}); + // For check S6 == C10 * C20. + builder.Build( + 1, 2, std::vector{dimOpS6, dimOpC10, dimOpC20}); + // Mark C10 * S0 * S1 == S2 * S3 * S7, for check C10 == S7. + builder.Build( + 3, + 3, + std::vector{ + dimOpC10, dimOpS0, dimOpS1, dimOpS2, dimOpS3, dimOpS7}); + + // Mark S8 * S9 == S10 * S11, for unsimplify product case + builder.Build( + 2, 2, std::vector{dimOpS8, dimOpS9, dimOpS10, dimOpS11}); + + ir::SymbolicDimMgr symDimMgr(program.module_op()); + + symDimMgr.load(); + + // For check indirect equality: S1 * S4 == S2 * S5 + ir::SymbolicDimProduct symDimProductLhs; + ir::SymbolicDimProduct symDimProductRhs; + + symDimProductLhs.symbols.push_back(symDimS1); + symDimProductLhs.symbols.push_back(symDimS4); + + symDimProductRhs.symbols.push_back(symDimS2); + symDimProductRhs.symbols.push_back(symDimS5); + + // For uncompletely simplied product check: S8 * S9 * S12 == S10 * S11 * S12 + ir::SymbolicDimProduct symDimProductLhs_; + ir::SymbolicDimProduct symDimProductRhs_; + + symDimProductLhs_.symbols.push_back(symDimS8); + symDimProductLhs_.symbols.push_back(symDimS9); + symDimProductLhs_.symbols.push_back(symDimS12); + + symDimProductRhs_.symbols.push_back(symDimS10); + symDimProductRhs_.symbols.push_back(symDimS11); + symDimProductRhs_.symbols.push_back(symDimS12); + + // For check simplifySymbolicDimProduct, {factor = 1, Sym = {S7}} => {factor = + // 10} + ir::SymbolicDimProduct symDimProductS7; + symDimProductS7.symbols.push_back(symDimS7); + ir::SymbolicDimProduct simplifiedProductS7 = + symDimMgr.simplifySymbolicDimProduct(symDimProductS7); + + // For check simplifySymbolicDimProductPair, X * Y * Y, Y * Y * Z => X, Z + ir::SymbolicDimProduct symDimProductPairLhs; + ir::SymbolicDimProduct symDimProductPairRhs; + ir::SymbolicDimProduct newLhs, newRhs; + symDimProductPairLhs.symbols.push_back(symDimS4); + symDimProductPairLhs.symbols.push_back(symDimS1); + symDimProductPairLhs.symbols.push_back(symDimS2); + symDimProductPairRhs.symbols.push_back(symDimS1); + symDimProductPairRhs.symbols.push_back(symDimS2); + symDimProductPairRhs.symbols.push_back(symDimS3); + + std::tie(newLhs, newRhs) = symDimMgr.simplifySymbolicDimProductPair( + symDimProductPairLhs, symDimProductPairRhs); + + // For check symbolicDimProductDivide, {S4 * S1 * C20} / {S1 * C10} => {factor + // = 2 Sym = {S4}} + ir::SymbolicDimProduct symDimProductDivLhs; + ir::SymbolicDimProduct symDimProductDivRhs; + symDimProductDivLhs.symbols.push_back(symDimS4); + symDimProductDivLhs.symbols.push_back(symDimS1); + symDimProductDivLhs.symbols.push_back(symDimC20); + symDimProductDivRhs.symbols.push_back(symDimS1); + symDimProductDivRhs.symbols.push_back(symDimC10); + + ir::SymbolicDimProduct *divRes = symDimMgr.symbolicDimProductDivide( + symDimProductDivLhs, symDimProductDivRhs); + + EXPECT_TRUE(symDimMgr.isSymbolicDimEqual(symDimS1, symDimS2)); + EXPECT_TRUE(symDimMgr.isSymbolicDimEqual(symDimS0, symDimS3)); + EXPECT_TRUE(symDimMgr.isSymbolicDimEqual(symDimS4, symDimS5)); + EXPECT_EQ(symDimS6.getValue(), 200); + EXPECT_EQ(symDimMgr.symbolTable().lookup("C20"), + symDimC20); + EXPECT_EQ(symDimS7.getValue(), symDimC10.getValue()); + EXPECT_EQ(simplifiedProductS7.factor, 10); + EXPECT_EQ(simplifiedProductS7.symbols.size(), static_cast(0)); + EXPECT_EQ(newLhs.symbols.size(), static_cast(1)); + EXPECT_EQ(newRhs.symbols.size(), static_cast(1)); + EXPECT_EQ(newLhs.symbols[0], symDimMgr.getRootSymbolicDim(symDimS4)); + EXPECT_EQ(newRhs.symbols[0], symDimMgr.getRootSymbolicDim(symDimS3)); + EXPECT_EQ(divRes->factor, 2); + EXPECT_EQ(divRes->symbols.size(), static_cast(1)); + EXPECT_EQ(divRes->symbols[0], symDimMgr.getRootSymbolicDim(symDimS4)); + EXPECT_TRUE( + symDimMgr.isSymbolicDimProductEqual(symDimProductLhs, symDimProductRhs)); + EXPECT_TRUE(symDimMgr.isSymbolicDimProductEqual(symDimProductLhs_, + symDimProductRhs_)); +} + TEST(assist_struct_test, dim) { ir::IrContext *ctx = ir::IrContext::Instance(); ir::Program program(ctx); diff --git a/test/dygraph_to_static/test_simnet.py b/test/dygraph_to_static/test_simnet.py index 2c69cf2072cf9..09ea063f9ad8e 100644 --- a/test/dygraph_to_static/test_simnet.py +++ b/test/dygraph_to_static/test_simnet.py @@ -17,6 +17,7 @@ import unittest import numpy as np +from dygraph_to_static_util import test_and_compare_with_new_ir from simnet_dygraph_model import BOW, HingeLoss import paddle @@ -176,6 +177,7 @@ def train(conf_dict, to_static): class TestSimnet(unittest.TestCase): + @test_and_compare_with_new_ir(True) def test_dygraph_static_same_loss(self): if fluid.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) diff --git a/test/dygraph_to_static/test_simnet_v2.py b/test/dygraph_to_static/test_simnet_v2.py index a49cc23af11f8..316464ab79132 100644 --- a/test/dygraph_to_static/test_simnet_v2.py +++ b/test/dygraph_to_static/test_simnet_v2.py @@ -17,6 +17,7 @@ import unittest import numpy as np +from dygraph_to_static_util import test_and_compare_with_new_ir from simnet_dygraph_model_v2 import BOW, HingeLoss import paddle @@ -176,6 +177,7 @@ def train(conf_dict, to_static): class TestSimnet(unittest.TestCase): + @test_and_compare_with_new_ir(True) def test_dygraph_static_same_loss(self): if paddle.is_compiled_with_cuda(): paddle.fluid.set_flags({"FLAGS_cudnn_deterministic": True}) diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt index 05f30dca257f1..46a0136167e9e 100644 --- a/test/legacy_test/CMakeLists.txt +++ b/test/legacy_test/CMakeLists.txt @@ -592,6 +592,10 @@ py_test_modules( py_test_modules(test_install_check MODULES test_install_check ENVS FLAGS_cudnn_deterministic=1) set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST") +py_test_modules(test_install_check_new_ir MODULES test_install_check ENVS + FLAGS_cudnn_deterministic=1 FLAGS_enable_new_ir_in_executor=1) +set_tests_properties(test_install_check_new_ir PROPERTIES LABELS + "RUN_TYPE=DIST") if((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)) py_test_modules(test_fused_gemm_epilogue_op MODULES diff --git a/test/legacy_test/test_poisson_nll_loss.py b/test/legacy_test/test_poisson_nll_loss.py index 096018a6e2bf0..14ad375519914 100644 --- a/test/legacy_test/test_poisson_nll_loss.py +++ b/test/legacy_test/test_poisson_nll_loss.py @@ -51,7 +51,9 @@ def ref_poisson_nll_loss( stirling_approx = ( label * np.log(label) - label + 0.5 * np.log(2 * np.pi * label) ) - loss_out += np.where(stirling_approx <= 1, 0, stirling_approx) + loss_out += np.where( + label > 1, stirling_approx, np.zeros_like(stirling_approx) + ) if reduction == 'none': return loss_out diff --git a/test/prim/new_ir_prim/test_vjp_prim.py b/test/prim/new_ir_prim/test_vjp_prim.py index 2a29ae9f69fc2..22309a08823ec 100644 --- a/test/prim/new_ir_prim/test_vjp_prim.py +++ b/test/prim/new_ir_prim/test_vjp_prim.py @@ -63,6 +63,7 @@ class TestVjpPrim(unittest.TestCase): def test_divide_grad_prim_case1(self): newir_program = get_ir_divide_program() paddle.framework.core._set_prim_backward_enabled(True) + paddle.framework.set_flags({"FLAGS_enable_new_ir_api": True}) dout = newir_program.block().ops[-2].result(0) out_grads = [[dout]] stop_gradients = [[False], [False]] @@ -83,9 +84,9 @@ def test_divide_grad_prim_case1(self): "pd.full", "pd.elementwise_pow", "pd.divide", - "pd.multiply", "pd.full", "pd.scale", + "pd.multiply", "pd.full_int_array", "pd.sum", "pd.full_int_array", @@ -101,6 +102,7 @@ def test_divide_grad_prim_case1(self): for idx, op in enumerate(newir_program.block().ops): self.assertEqual(op.name(), all_op_names[idx]) paddle.framework.core._set_prim_backward_enabled(False) + paddle.framework.set_flags({"FLAGS_enable_new_ir_api": False}) def test_divide_grad_no_prim(self): newir_program = get_ir_divide_program() @@ -123,6 +125,7 @@ def test_divide_grad_no_prim(self): def test_sum_grad_prim(self): newir_program = get_ir_sum_program() paddle.framework.core._set_prim_backward_enabled(True) + paddle.framework.set_flags({"FLAGS_enable_new_ir_api": True}) dout = newir_program.block().ops[-3].result(0) out_grads = [[dout]] stop_gradients = [[False], [True]] @@ -147,6 +150,7 @@ def test_sum_grad_prim(self): for idx, op in enumerate(newir_program.block().ops): self.assertEqual(op.name(), all_op_names[idx]) paddle.framework.core._set_prim_backward_enabled(False) + paddle.framework.set_flags({"FLAGS_enable_new_ir_api": False}) def test_sum_grad_no_prim(self): newir_program = get_ir_sum_program()