Skip to content

Commit

Permalink
[NewIR]support new ir load combine (#56101)
Browse files Browse the repository at this point in the history
* support new ir load combine

* update

* polish code

* remove print

* polish code

* fix bug

* polish code

* fix compile bug
  • Loading branch information
phlrain authored Aug 15, 2023
1 parent a2fe1e2 commit b850acb
Show file tree
Hide file tree
Showing 12 changed files with 163 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/ir/core/value.h"

namespace ir {
class Value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"

#include <map>
#include <string>
#include <unordered_map>
#include <vector>

#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"

#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/event.h"
Expand All @@ -42,7 +42,7 @@ std::vector<int> GetValueIds(
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name) {
std::vector<int> ids;
std::string var_name = value_2_var_name.at(value);
auto& var_name = value_2_var_name.at(value);
ids.push_back(var_name_2_id.at(var_name));
// NOTE(zhangbo): Value maybe a VariableRefArray
auto var = inner_scope->FindVar(var_name);
Expand All @@ -61,7 +61,7 @@ platform::DeviceContext* ParseDeviceContext(
const platform::Place& place,
const std::string& execution_stream,
const int stream_priority) {
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
interpreter::ContextManager& ctx_manager =
Expand Down Expand Up @@ -149,7 +149,7 @@ OpFuncType AnalyseOpFuncType(::ir::Operation* op,
// computing. They execute serially in device thread and block CUDA kernel
// launching in other GPU OPs. To improve performance, set them as kGpuSync
// and so that they would be dispatched to host thread.
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
if (op_name == kCoalesceTensor &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ LegacyKernelInstruction::LegacyKernelInstruction(
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name)
: InstructionBase(id, place) {
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
ir::OpInfo op_info = ir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
Expand Down Expand Up @@ -97,18 +97,20 @@ LegacyKernelInstruction::LegacyKernelInstruction(
yaml_interface->get_op_info_());
VLOG(6) << "finish process yaml_info_parser";

::ir::BuildPhiContext<
phi::InferMetaContext,
phi::MetaTensor,
phi::MetaTensor,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
false>(op,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
&infer_meta_context_);
if (infer_meta_interface_) {
::ir::BuildPhiContext<
phi::InferMetaContext,
phi::MetaTensor,
phi::MetaTensor,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
false>(op,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
&infer_meta_context_);
}
VLOG(6) << "finish process infer meta context";

auto kernel_name =
Expand All @@ -123,8 +125,10 @@ LegacyKernelInstruction::LegacyKernelInstruction(
phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
VLOG(6) << "finish process select kernel";

operator_base_ =
ir::BuildOperatorBase(op, value_2_var_name, yaml_info_parser);
Scope* inner_scope = local_scope == nullptr ? scope : local_scope;

operator_base_ = ir::BuildOperatorBase(
op, value_2_var_name, yaml_info_parser, variable_2_var_name, inner_scope);
paddle::framework::VariableValueMap in_map;
paddle::framework::VariableValueMap out_map;
auto dev_ctx = phi::DeviceContextPool::Instance().Get(
Expand All @@ -151,7 +155,6 @@ LegacyKernelInstruction::LegacyKernelInstruction(
GetStreamPriority()));
VLOG(6) << "finish process device context";

Scope* inner_scope = local_scope == nullptr ? scope : local_scope;
InitInputsOutputsIds(
op, inner_scope, value_2_var_name, var_name_2_id, variable_2_var_name);
VLOG(6) << "finish process inputs outputs index";
Expand All @@ -169,10 +172,16 @@ LegacyKernelInstruction::~LegacyKernelInstruction() {
if (kernel_context_ != nullptr) {
delete kernel_context_;
}

if (phi_kernel_ != nullptr) {
delete phi_kernel_;
}
}

void LegacyKernelInstruction::Run() {
infer_meta_interface_->infer_meta_(&(infer_meta_context_));
if (infer_meta_interface_) {
infer_meta_interface_->infer_meta_(&(infer_meta_context_));
}
VLOG(6) << "Run op " << legacy_op_name_ << " infer meta.";
(*(phi_kernel_))((kernel_context_));
VLOG(6) << "Run op " << legacy_op_name_ << " kernel.";
Expand Down
41 changes: 11 additions & 30 deletions paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1076,36 +1076,17 @@ void BuildOpFuncList(
"not found kernel for [%s]",
kernel_name);

if (kernel_name == "fused_softmax_mask_upper_triangle" ||
kernel_name == "fused_softmax_mask_upper_triangle_grad") {
// builder operator
op_func_node.operator_base_ =
ir::BuildOperatorBase(op, value_2_name_map, op_yaml_info_parser);
paddle::framework::VariableValueMap in_map;
paddle::framework::VariableValueMap out_map;
op_func_node.runtime_ctx_ =
std::make_shared<paddle::framework::RuntimeContext>(
paddle::framework::RuntimeContext(in_map, out_map));
ir::BuildRuntimeContext(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
op_func_node.runtime_ctx_.get());
op_func_node.fluid_op = true;
} else {
::ir::BuildPhiContext<phi::KernelContext,
const phi::TensorBase*,
phi::TensorBase*,
paddle::small_vector<const phi::TensorBase*>,
paddle::small_vector<phi::TensorBase*>,
true>(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
&(op_func_node.kernel_context_));
}
::ir::BuildPhiContext<phi::KernelContext,
const phi::TensorBase*,
phi::TensorBase*,
paddle::small_vector<const phi::TensorBase*>,
paddle::small_vector<phi::TensorBase*>,
true>(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
&(op_func_node.kernel_context_));

VLOG(6) << "finish process kernel context";
op_func_node.kernel_context_.SetDeviceContext(
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/new_executor/new_ir_interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

#include "paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.h"
#include "paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h"
#include "paddle/fluid/ir/dialect/utils.h"
#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h"
#include "paddle/ir/core/builtin_attribute.h"

Expand Down Expand Up @@ -428,8 +429,7 @@ void NewIRInterpreter::BuildInstruction() {
}
VLOG(6) << "process " << op_name;

if (op_name == "pd.fused_softmax_mask_upper_triangle" ||
op_name == "pd.fused_softmax_mask_upper_triangle_grad") {
if (dialect::IsLegacyOp(op_name)) {
vec_instruction_base_.emplace_back(
std::make_unique<LegacyKernelInstruction>(op_idx++,
place_,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/ir/dialect/pd_op.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
- {typename: bool, name: load_as_fp16}
- {typename: bool, name: model_from_memory}
outputs:
- {typename: 'Tensor[]', name: out, optional: true, intermediate: false}
- {typename: 'Tensor[]', name: Out, optional: true, intermediate: false}
no_need_buffer: null
data_transform: null
kernel:
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/ir/dialect/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
namespace paddle {
namespace dialect {

const std::unordered_set<std::string> LegacyOpList = {
"pd.fused_softmax_mask_upper_triangle",
"pd.fused_softmax_mask_upper_triangle_grad",
"pd.load_combine"};

enum class AttrType {
UNDEFINED = 0,
BOOL,
Expand Down Expand Up @@ -167,5 +172,7 @@ VariantType GetAttributeData(const ir::Attribute& attr) {
return kAttrCastMap[attr_type](attr);
}

bool IsLegacyOp(const std::string& name) { return LegacyOpList.count(name); }

} // namespace dialect
} // namespace paddle
2 changes: 2 additions & 0 deletions paddle/fluid/ir/dialect/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,5 +147,7 @@ static inline ir::Attribute TransToIrAttribute(phi::Scalar scalar,

VariantType GetAttributeData(const ir::Attribute& attr);

bool IsLegacyOp(const std::string& name);

} // namespace dialect
} // namespace paddle
76 changes: 70 additions & 6 deletions paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -598,17 +598,39 @@ void BuildRuntimeContext(
PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
phi::errors::PreconditionNotMet(
"can not find var[%s] in scope", in_var_name));

auto var = inner_scope->FindVar(in_var_name);
std::vector<paddle::framework::Variable*> vec_tmp = {var};
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
runtime_ctx->outputs[legacy_attr_name] = vec_tmp;

auto type = ptr.type();
auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
std::vector<paddle::framework::Variable*> vec_tmp = {var};

runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
} else if (type.isa<ir::VectorType>()) {
auto var_ref = var->Get<paddle::framework::VariableRefArray>();
std::vector<paddle::framework::Variable*> vec_tmp;
vec_tmp.reserve(var_ref.size());
for (size_t k = 0; k < var_ref.size(); ++k) {
vec_tmp.push_back(const_cast<paddle::framework::Variable*>(var_ref[k]));
}
runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support AllocatedDenseTensor, AllocatedSelectedRowsType and "
"ir::vector type"));
}
}
}

std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name,
const paddle::framework::Scope* scope) {
paddle::framework::VariableNameMap in_name_map;
paddle::framework::VariableNameMap out_name_map;
paddle::framework::AttributeMap attr_map;
Expand Down Expand Up @@ -637,15 +659,57 @@ std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
}

// build attribute
auto& op_attr_map = op->attributes();
auto attr_name_list = op_yaml_info.AttrParams(true);
for (auto& name : attr_name_list) {
auto& val = op_attr_map.at(name);

if (val.isa<ir::StrAttribute>()) {
attr_map[name] = val.dyn_cast<ir::StrAttribute>().AsString();
} else if (val.isa<ir::Int32Attribute>()) {
attr_map[name] = val.dyn_cast<ir::Int32Attribute>().data();
} else if (val.isa<ir::BoolAttribute>()) {
attr_map[name] = val.dyn_cast<ir::BoolAttribute>().data();
} else if (val.isa<ir::FloatAttribute>()) {
attr_map[name] = val.dyn_cast<ir::FloatAttribute>().data();
} else if (val.isa<ir::DoubleAttribute>()) {
attr_map[name] = val.dyn_cast<ir::DoubleAttribute>().data();
} else if (val.isa<ir::Int64Attribute>()) {
attr_map[name] = val.dyn_cast<ir::Int64Attribute>().data();
} else {
std::stringstream ss;
val.Print(ss);
VLOG(1) << "type not support " << ss.str() << std::endl;
PADDLE_THROW("Type[%s] in attribute map not support yet", ss.str());
}
}

auto& output_name_list = op_yaml_info.OutputNames();
for (size_t i = 0; i < output_name_list.size(); ++i) {
auto name = output_name_list[i];
ir::Value ptr = op->result(i);

auto out_var_name = name_map.at(ptr);
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
out_name_map[legacy_attr_name].push_back(out_var_name);

auto type = ptr.type();
auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
out_name_map[legacy_arg_name].push_back(out_var_name);
} else if (type.isa<ir::VectorType>()) {
auto var = scope->FindVar(out_var_name);
auto var_ref = var->Get<paddle::framework::VariableRefArray>();
for (size_t k = 0; k < var_ref.size(); ++k) {
PADDLE_ENFORCE(variable_2_var_name.count(var_ref[k]),
"Variable MUST in variable_2_var_name map");
out_name_map[legacy_arg_name].push_back(
variable_2_var_name.at(var_ref[k]));
}
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support AllocatedDenseTensor, AllocatedSelectedRowsType and "
"ir::vector type"));
}
}

auto& op_info = paddle::framework::OpInfoMap::Instance().Get(fluid_op_name);
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ void BuildRuntimeContext(
std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info);
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name,
const paddle::framework::Scope* scope);

template <typename Context,
typename InType,
Expand Down
11 changes: 5 additions & 6 deletions paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@ const std::unordered_set<std::string> UnchangeOutputOps = {
"builtin.get_parameter",
"pd.shadow_output"};

const std::unordered_set<std::string> LegacyOpList = {
"pd.fused_softmax_mask_upper_triangle",
"pd.fused_softmax_mask_upper_triangle_grad"};

bool NeedFallBackCpu(const ir::Operation* op,
const std::string& kernel_fn_name,
const phi::KernelKey& kernel_key) {
Expand Down Expand Up @@ -553,6 +549,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
GetKernelKey(op_item, place, map_value_pair, op_info_parser.get());
VLOG(6) << "kernel type " << kernel_key;

if (op_item->name() == "pd.load_combine") {
kernel_key.set_dtype(phi::DataType::FLOAT32);
}
if (NeedFallBackCpu((op_item), kernel_fn_str, kernel_key)) {
kernel_key.set_backend(phi::Backend::CPU);
}
Expand All @@ -571,7 +570,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
auto args_def = phi_kernel.args_def();
auto output_defs = args_def.output_defs();
if (!UnchangeOutputOps.count(op_item->name()) &&
!LegacyOpList.count(op_item->name())) {
!IsLegacyOp(op_item->name())) {
PADDLE_ENFORCE_EQ(
op_item->num_results(),
output_defs.size(),
Expand All @@ -583,7 +582,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
for (size_t i = 0; i < op_item->num_results(); ++i) {
phi::Place out_place;
if ((!UnchangeOutputOps.count(op_item->name())) &&
(!LegacyOpList.count(op_item->name())) && phi_kernel.IsValid()) {
(!IsLegacyOp(op_item->name())) && phi_kernel.IsValid()) {
out_place = phi::TransToPhiPlace(output_defs[i].backend);
} else {
out_place = phi::TransToPhiPlace(kernel_key.backend());
Expand Down
Loading

0 comments on commit b850acb

Please sign in to comment.