Skip to content

Commit

Permalink
change CinnLanuchOp inherit from OperatorWithKernel
Browse files Browse the repository at this point in the history
  • Loading branch information
CtfGo committed Oct 29, 2021
1 parent 9810e10 commit 2daaa75
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 94 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ endif()
if (WITH_CINN)
cc_library(cinn_launch_op_helper SRCS cinn_launch_op_helper.cc DEPS operator cinn)
cc_test(cinn_launch_op_helper_test SRCS cinn_launch_op_helper_test.cc DEPS cinn_launch_op_helper)
op_library(cinn_launch_op SRCS cinn_launch_op.cc DEPS cinn_compiler cinn_launch_op_helper cinn ${OP_HEADER_DEPS})
op_library(cinn_launch_op SRCS cinn_launch_op.cc cinn_launch_op.cu.cc DEPS cinn_compiler cinn_launch_op_helper cinn ${OP_HEADER_DEPS})
if (WITH_GPU)
nv_test(cinn_launch_op_test SRCS cinn_launch_op_test.cc DEPS cinn_compiler cinn_launch_op elementwise_add_op)
endif()
Expand Down
128 changes: 36 additions & 92 deletions paddle/fluid/operators/cinn_launch_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,104 +12,39 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>
#include <unordered_map>
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/hlir/framework/scope.h"
#include "cinn/runtime/cinn_runtime.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
#include "paddle/fluid/operators/cinn_launch_op_helper.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/fluid/operators/cinn_launch_op.h"

namespace paddle {
namespace operators {

static constexpr char kX[] = "X";
static constexpr char kOutputs[] = "Out";
static constexpr char kCompilationKey[] = "compilation_key";

using LoDTensor = framework::LoDTensor;
using Name2ConstTensor = std::map<std::string, const LoDTensor*>;
using CinnTensor = cinn::hlir::framework::Tensor;
using Name2CinnTensor = std::unordered_map<std::string, CinnTensor>;
using framework::paddle2cinn::CinnCompiler;

class CinnLaunchOp : public framework::OperatorBase {
class CinnLaunchOp : public framework::OperatorWithKernel {
public:
CinnLaunchOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}

private:
void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override {
// Step 1. Find graph object and prepare input
PADDLE_ENFORCE_EQ(HasAttr(kCompilationKey), true,
platform::errors::NotFound(
"No Attribute(%s) found for CinnLaunchOp operator.",
kCompilationKey));
const auto& compilation_key = Attr<std::string>(kCompilationKey);
VLOG(2) << "CinnLaunchOp compilation_key:" << compilation_key;

const auto& graph = CinnCompiler::GetInstance()->FindGraph(compilation_key);
OP_INOUT_CHECK(HasInputs(kX), "Input", kX, "CinnLaunchOp");
Name2ConstTensor input_tensors =
details::GetConstTensors(scope, Inputs(kX));

// Step 2. Get compilation result of the graph
auto target = details::PlaceToCinnTarget(place);
const auto& cinn_compiled_object =
CinnCompiler::GetInstance()->Compile(graph, input_tensors, target);
VLOG(2) << "CinnLaunchOp compile graph done on " << place;

const auto& cinn_runtime_program = cinn_compiled_object.runtime_program;
const auto& compiled_scope = *(cinn_compiled_object.scope.get());
const auto& paddle2cinn_varmap = cinn_compiled_object.paddle2cinn_varmap;
using framework::OperatorWithKernel::OperatorWithKernel;

// Step 3. Initialize all variables of the compilation runtime program
// in paddle, and pack them into execution arguments
VLOG(2) << "CinnLaunchOp prepare execution arguments";
std::map<std::string, cinn_pod_value_t> name2argument;
std::vector<std::unique_ptr<cinn_buffer_t>> hold_buffers;
// prepare input variables
Name2CinnTensor input_compiled_tensors = details::GetCompiledTensors(
Inputs(kX), compiled_scope, paddle2cinn_varmap);
details::CheckTensorEquivalent(input_tensors, input_compiled_tensors);
details::AppendExecutionArguments(scope, Inputs(kX), paddle2cinn_varmap,
&name2argument, &hold_buffers);

// prepare output variables
Name2CinnTensor output_compiled_tensors = details::GetCompiledTensors(
Outputs(kOutputs), compiled_scope, paddle2cinn_varmap);
details::InitializeOutputVar(scope, place, output_compiled_tensors);
Name2ConstTensor output_tensors =
details::GetConstTensors(scope, Outputs(kOutputs));
details::CheckTensorEquivalent(output_tensors, output_compiled_tensors);
details::AppendExecutionArguments(scope, Outputs(kOutputs),
paddle2cinn_varmap, &name2argument,
&hold_buffers);

// prepare temporary variables
auto temp_variable_names = details::SeperateTempVar(
compiled_scope, paddle2cinn_varmap, Inputs(kX), Outputs(kOutputs));
auto temp_scope = scope.NewTmpScope();
if (!temp_variable_names.empty()) {
details::InitializeTempVar(temp_variable_names, compiled_scope, place,
temp_scope.get());
details::AppendExecutionArguments(*temp_scope, temp_variable_names,
paddle2cinn_varmap, &name2argument,
&hold_buffers);
}
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInputs(kX), "Input", kX, "CinnLaunchOp");
OP_INOUT_CHECK(ctx->HasOutput(kOutputs), "Output", kOutputs,
"CinnLaunchOp");
}

// Step 4. Launch CINN to execute the compilation runtime program
cinn_runtime_program->Execute(&name2argument);
VLOG(2) << "CinnLaunchOp launch runtime_program execution done.";
protected:
/* [Why use single type kernel]:
*
* This op is similar to a control flow op, it doses not need
* a op kernel, but in order to make it execute under dynamic
* graph mode, implement it with op kernel.
*
* So whether the kernel data type is int, float or other type,
* which has no effect on its execution logic, so directly
* specified a data type here.
*
* Of course, the data type here is also not important.
*/

framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.GetPlace());
}
};

Expand Down Expand Up @@ -160,4 +95,13 @@ It accomplishs the computation of graph following several steps:
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(cinn_launch, ops::CinnLaunchOp, ops::CinnLaunchOpMaker);

REGISTER_OPERATOR(
cinn_launch, ops::CinnLaunchOp, ops::CinnLaunchOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);

/* see [Why use single type kernel] */
REGISTER_OP_CPU_KERNEL(
cinn_launch,
ops::CinnLaunchOpKernel<paddle::platform::CPUDeviceContext, float>);
20 changes: 20 additions & 0 deletions paddle/fluid/operators/cinn_launch_op.cu.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/cinn_launch_op.h"

/* see [Why use single type kernel] */
REGISTER_OP_CUDA_KERNEL(cinn_launch,
paddle::operators::CinnLaunchOpKernel<
paddle::platform::CUDADeviceContext, float>);
117 changes: 117 additions & 0 deletions paddle/fluid/operators/cinn_launch_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <unordered_map>
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/hlir/framework/scope.h"
#include "cinn/runtime/cinn_runtime.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
#include "paddle/fluid/operators/cinn_launch_op_helper.h"
#include "paddle/fluid/string/string_helper.h"

namespace paddle {
namespace operators {

static constexpr char kX[] = "X";
static constexpr char kOutputs[] = "Out";
static constexpr char kCompilationKey[] = "compilation_key";

using LoDTensor = framework::LoDTensor;
using Name2ConstTensor = std::map<std::string, const LoDTensor*>;
using CinnTensor = cinn::hlir::framework::Tensor;
using Name2CinnTensor = std::unordered_map<std::string, CinnTensor>;
using framework::paddle2cinn::CinnCompiler;

template <typename DeviceContext, typename T>
class CinnLaunchOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
// Step 1. Find graph object and prepare input
PADDLE_ENFORCE_EQ(ctx.HasAttr(kCompilationKey), true,
platform::errors::NotFound(
"No Attribute(%s) found for CinnLaunchOp operator.",
kCompilationKey));
const auto& compilation_key =
ctx.template Attr<std::string>(kCompilationKey);
VLOG(2) << "CinnLaunchOp compilation_key:" << compilation_key;

const auto& graph = CinnCompiler::GetInstance()->FindGraph(compilation_key);
auto input_variable_names = ctx.InputNames(kX);
Name2ConstTensor input_tensors =
details::GetConstTensors(ctx.scope(), input_variable_names);

// Step 2. Get compilation result of the graph
auto target = details::PlaceToCinnTarget(ctx.GetPlace());
const auto& cinn_compiled_object =
CinnCompiler::GetInstance()->Compile(graph, input_tensors, target);
VLOG(2) << "CinnLaunchOp compile graph done on " << ctx.GetPlace();

const auto& cinn_runtime_program = cinn_compiled_object.runtime_program;
const auto& compiled_scope = *(cinn_compiled_object.scope.get());
const auto& paddle2cinn_varmap = cinn_compiled_object.paddle2cinn_varmap;

// Step 3. Initialize all variables of the compilation runtime program
// in paddle, and pack them into execution arguments
VLOG(2) << "CinnLaunchOp prepare execution arguments";
std::map<std::string, cinn_pod_value_t> name2argument;
std::vector<std::unique_ptr<cinn_buffer_t>> hold_buffers;
// prepare input variables
Name2CinnTensor input_compiled_tensors = details::GetCompiledTensors(
input_variable_names, compiled_scope, paddle2cinn_varmap);
details::CheckTensorEquivalent(input_tensors, input_compiled_tensors);
details::AppendExecutionArguments(ctx.scope(), input_variable_names,
paddle2cinn_varmap, &name2argument,
&hold_buffers);

// prepare output variables
auto output_variable_names = ctx.OutputNames(kOutputs);
Name2CinnTensor output_compiled_tensors = details::GetCompiledTensors(
output_variable_names, compiled_scope, paddle2cinn_varmap);
details::InitializeOutputVar(ctx.scope(), ctx.GetPlace(),
output_compiled_tensors);
Name2ConstTensor output_tensors =
details::GetConstTensors(ctx.scope(), output_variable_names);
details::CheckTensorEquivalent(output_tensors, output_compiled_tensors);
details::AppendExecutionArguments(ctx.scope(), output_variable_names,
paddle2cinn_varmap, &name2argument,
&hold_buffers);

// prepare temporary variables
auto temp_variable_names =
details::SeperateTempVar(compiled_scope, paddle2cinn_varmap,
input_variable_names, output_variable_names);
auto temp_scope = ctx.scope().NewTmpScope();
if (!temp_variable_names.empty()) {
details::InitializeTempVar(temp_variable_names, compiled_scope,
ctx.GetPlace(), temp_scope.get());
details::AppendExecutionArguments(*temp_scope, temp_variable_names,
paddle2cinn_varmap, &name2argument,
&hold_buffers);
}

// Step 4. Launch CINN to execute the compilation runtime program
cinn_runtime_program->Execute(&name2argument);
VLOG(2) << "CinnLaunchOp launch runtime_program execution done.";
}
};

} // namespace operators
} // namespace paddle
5 changes: 4 additions & 1 deletion paddle/fluid/operators/cinn_launch_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ limitations under the License. */
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/init.h"

USE_NO_KERNEL_OP(cinn_launch);
USE_OP(cinn_launch);
USE_OP(elementwise_add);

namespace paddle {
Expand Down Expand Up @@ -166,6 +166,9 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) {
// create an new elementwise_add op
// because the above one cached the cpu kernel
LOG(INFO) << "Check compute result on gpu";
cinn_launch_op = paddle::framework::OpRegistry::CreateOp(
"cinn_launch", {{"X", {"test_x", "test_y"}}}, {{"Out", {test_out_name}}},
{{"compilation_key", compilation_key}});
elementwise_add_op = paddle::framework::OpRegistry::CreateOp(
"elementwise_add", {{"X", {"test_x"}}, {"Y", {"test_y"}}},
{{"Out", {expected_out_name}}}, {{}});
Expand Down

0 comments on commit 2daaa75

Please sign in to comment.