Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 5th No.112】move fused_gemm_epilogue to phi and add the yaml of identity_loss #59363

Merged
merged 29 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
341afcc
try to move fused_gemm_epilogue to phi
zeroRains Nov 25, 2023
31eb4eb
try to fix bug
zeroRains Nov 25, 2023
fafe072
update
zeroRains Nov 25, 2023
6f7a3d7
fix the bug in config
zeroRains Dec 1, 2023
d5d895e
fix the bug
zeroRains Dec 2, 2023
628cc43
config the identity_loss of yaml
zeroRains Dec 2, 2023
456022c
fix bug
zeroRains Dec 2, 2023
249dc76
temp
zeroRains Dec 4, 2023
3a74701
fix bug
zeroRains Dec 4, 2023
3f1de5b
remove the #include
zeroRains Dec 5, 2023
1b0c20a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
zeroRains Dec 6, 2023
cb4da1b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
zeroRains Dec 9, 2023
96a92f6
roback
zeroRains Dec 9, 2023
b05872d
remove file in fluid
zeroRains Dec 9, 2023
6cbe87d
add the output in op_comat
zeroRains Dec 10, 2023
bc1ec4a
try to move xpu
zeroRains Dec 12, 2023
6531fb7
fix config
zeroRains Dec 13, 2023
c98a435
roback the flashatten
zeroRains Dec 13, 2023
12a8411
change the ops/compate
zeroRains Dec 13, 2023
b616534
move the typename to kernel
zeroRains Dec 13, 2023
29f0b5c
remove VLOG
zeroRains Dec 13, 2023
7878144
fix bug
zeroRains Dec 13, 2023
8841394
remove the same define
zeroRains Dec 13, 2023
1e64184
fix bug
zeroRains Dec 13, 2023
5aa79a0
roback the sig file
zeroRains Dec 13, 2023
c41b7eb
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
zeroRains Dec 15, 2023
cc19bea
Update fused_gemm_epilogue_sig.cc
zeroRains Dec 20, 2023
fca12a8
Update fused_gemm_epilogue_sig.cc
zeroRains Dec 20, 2023
ac92a3d
fix a small bug
zeroRains Dec 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 22 additions & 256 deletions paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/fusion.h"
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h"

namespace paddle {
Expand All @@ -25,107 +28,6 @@ class FusedGemmEpilogueOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;

protected:
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedGemmEpilogueOp");
OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "FusedGemmEpilogueOp");
OP_INOUT_CHECK(
ctx->HasInput("Bias"), "Output", "Bias", "FusedGemmEpilogueOp");
OP_INOUT_CHECK(
ctx->HasOutput("Out"), "Output", "Out", "FusedGemmEpilogueOp");

auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto bias_dims = ctx->GetInputDim("Bias");
auto trans_x = ctx->Attrs().Get<bool>("trans_x");
auto trans_y = ctx->Attrs().Get<bool>("trans_y");

PADDLE_ENFORCE_EQ(
y_dims.size(),
2,
platform::errors::InvalidArgument(
"The Input tensor Y's dimension of FusedGemmEpilogueOp "
" should be 2, but got %d.",
y_dims.size()));

PADDLE_ENFORCE_GE(
x_dims.size(),
2,
platform::errors::InvalidArgument(
"The Input tensor X's dimension of FusedGemmEpilogueOp "
" should be >= 2, but got %d.",
x_dims.size()));

PADDLE_ENFORCE_EQ(
bias_dims.size(),
1,
platform::errors::InvalidArgument(
"The Input tensor bias's dimension of FusedGemmEpilogueOp "
" should be == 1, but got %d.",
bias_dims.size()));

PADDLE_ENFORCE_EQ(bias_dims[0],
trans_y ? y_dims[0] : y_dims[1],
platform::errors::InvalidArgument(
"The Input tensor bias's dimension 0"
" should be == Y[-1], but got bias's shape = [%s] "
"and Y's shape = [%s]",
bias_dims,
y_dims));

auto x_mat_dims =
phi::flatten_to_2d(x_dims, trans_x ? 1 : x_dims.size() - 1);

int K_from_x = trans_x ? x_mat_dims[0] : x_mat_dims[1];
int K_from_y = trans_y ? y_dims[1] : y_dims[0];

PADDLE_ENFORCE_EQ(
K_from_x,
K_from_y,
platform::errors::InvalidArgument(
"The last dimension of X should be equal with Y's first dimension."
"But received X[-1] = [%d], Y[0] = [%d].",
K_from_x,
K_from_y));

std::vector<int64_t> out_dims;
out_dims.reserve(static_cast<size_t>(x_dims.size()));
if (trans_x) {
for (int i = 1; i < x_dims.size(); ++i) out_dims.push_back(x_dims[i]);
} else {
for (int i = 0; i < x_dims.size() - 1; ++i) out_dims.push_back(x_dims[i]);
}

if (trans_y) {
out_dims.push_back(y_dims[0]);
} else {
out_dims.push_back(y_dims[1]);
}
ctx->SetOutputDim("Out", phi::make_ddim(out_dims));

auto activation = ctx->Attrs().Get<std::string>("activation");
if (ctx->HasOutput("ReserveSpace")) {
ctx->SetOutputDim("ReserveSpace", phi::make_ddim(out_dims));

if (activation == "none") {
PADDLE_THROW(platform::errors::InvalidArgument(
"The ReserveSpace would not be used when activation = \"none\""));
} else {
int min_size_of_n = activation == "relu" ? 128 : 8;
int N_size = trans_y ? y_dims[0] : y_dims[1];
PADDLE_ENFORCE_EQ(
N_size % min_size_of_n,
0,
platform::errors::InvalidArgument(
"The output dimension N (X(MxK) * Y(KxN) = C(MxN)) "
"should be multiple of %d when auxiliary_key given "
"and activation=%s, but got N = %d.",
min_size_of_n,
activation,
N_size));
}
}
}

phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
Expand Down Expand Up @@ -188,156 +90,14 @@ class FusedGemmEpilogueGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;

protected:
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(
ctx->HasInput("DOut"), "Input", "DOut", "FusedGemmEpilogueGradOp");
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedGemmEpilogueGradOp");
OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "FusedGemmEpilogueGradOp");
OP_INOUT_CHECK(ctx->HasOutput("DY"), "Output", "DY", "FusedGemmEpilogueOp");

auto dout_dims = ctx->GetInputDim("DOut");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto trans_x = ctx->Attrs().Get<bool>("trans_x");
auto trans_y = ctx->Attrs().Get<bool>("trans_y");

PADDLE_ENFORCE_GE(
dout_dims.size(),
2,
platform::errors::InvalidArgument(
"The Input tensor DOut's dimension of FusedGemmEpilogueGradOp "
" should be >= 2, but got %d.",
dout_dims.size()));

PADDLE_ENFORCE_EQ(
y_dims.size(),
2,
platform::errors::InvalidArgument(
"The Input tensor Y's dimension of FusedGemmEpilogueGradOp "
" should be 2, but got %d.",
y_dims.size()));

PADDLE_ENFORCE_GE(
x_dims.size(),
2,
platform::errors::InvalidArgument(
"The Input tensor X's dimension of FusedGemmEpilogueGradOp "
" should be >= 2, but got %d.",
x_dims.size()));

PADDLE_ENFORCE_EQ(
dout_dims.size(),
x_dims.size(),
platform::errors::InvalidArgument(
"The Input tensor DOut's and X's dimension of "
"FusedGemmEpilogueGradOp "
" should be the same, but got DOut's dim = %d and X's = %d.",
dout_dims.size(),
x_dims.size()));

auto dout_mat_dims = phi::flatten_to_2d(dout_dims, dout_dims.size() - 1);
auto x_mat_dims = phi::flatten_to_2d(x_dims, x_dims.size() - 1);

PADDLE_ENFORCE_EQ(
dout_mat_dims[1],
trans_y ? y_dims[0] : y_dims[1],
platform::errors::InvalidArgument(
"The last dimension of DOut should be equal with Y's last"
"dimension. But received DOut[-1] = [%d], Y[1] = [%d].",
dout_mat_dims[1],
y_dims[1]));

PADDLE_ENFORCE_EQ(
dout_mat_dims[0],
trans_x ? x_mat_dims[1] : x_mat_dims[0],
platform::errors::InvalidArgument(
"The first dimension of DOut should be equal with X's first"
"dimension. But received DOut[0] = [%d], Y[0] = [%d].",
dout_mat_dims[0],
x_mat_dims[0]));

auto activation_grad = ctx->Attrs().Get<std::string>("activation_grad");
if (activation_grad != "none" && !ctx->HasInput("ReserveSpace")) {
PADDLE_ENFORCE_EQ(true,
false,
platform::errors::InvalidArgument(
"The ReserveSpace should not be empty. "
"when activation == {relu_grad, gelu_grad}."));
}

if (ctx->HasOutput("DX")) {
ctx->SetOutputDim("DX", x_dims);
}
ctx->SetOutputDim("DY", y_dims);

if (ctx->HasOutput("DBias")) {
int64_t dbias_dim = trans_y ? y_dims[0] : y_dims[1];
ctx->SetOutputDim("DBias", phi::make_ddim({dbias_dim}));
}
}

phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "DOut");
auto data_type = OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out"));
return phi::KernelKey(data_type, ctx.GetPlace());
}
};

class FusedGemmEpilogueGradOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("DOut",
"The input grad tensor to Out of Out = (Act(X) * Y) + bias");
AddInput("X", "The input tensor X of Out = (Act(X) * Y) + bias");
AddInput("Y", "The input tensor Y of Out = (Act(X) * Y) + bias");
AddInput("ReserveSpace",
R"DOC(A GPU space to fetch
auxiliary data pointer. It is used to pass auxiliary data pointer
for fused_gemm_epilogue_grad op. If not given (empty string), the
auxiliary mode would not be enable.)DOC")
.AsDispensable();

AddOutput("DX", "The output grad tensor to X of Out = (Act(X) * Y) + bias.")
.AsDispensable();
AddOutput("DY",
"The output grad tensor to Y of Out = (Act(X) * Y) + bias.");
AddOutput("DBias",
"The output grad tensor to bias of Out = (Act(X) * Y) + bias.")
.AsDispensable();
AddAttr<bool>(
"trans_x",
R"DOC((bool, default false), Whether to transpose input tensor X
or not. The input tensor X coulbe be more than two dimension. When
set trans_x=true, it would fully reverse X. For instant: X with shpae
[d0, d1, d2, d3] -> [d3, d2, d1, d0].)DOC")
.SetDefault(false);
AddAttr<bool>(
"trans_y",
R"DOC((bool, default false), Whether to transpose input tensor Y
or not. The input tensor Y should be two dimension. When
set trans_y=true, it would transpose Y. For instant: Y with shpae
[d0, d1] -> [d1, d0].)DOC")
.SetDefault(false);

AddAttr<std::string>(
"activation_grad",
R"DOC((string, default none), The backward activation function. It could be
one of {none, relu_grad, gelu_grad}. When none is given, The backward Act would
be null operations)DOC")
.SetDefault("none");

AddComment(R"DOC(
FusedGemmEpilogueGrad Operator
This operator is used to perform backward of Elementwise_add(Matmul(Activeation(X), Y), bias).
It is equal to Activation (None, ReLU or GeLU) + paddle.nn.Linear.

Note:
X could be more than two dimension and would be flatten to 2D for computing.
X with shape [d0, d1, d2, d3] -> X_2D with shape [d0*d1*d2, d3]
)DOC");
}
};

template <typename T>
class FusedGemmEpilogueOpGradMaker : public framework::SingleGradOpMaker<T> {
public:
Expand All @@ -353,11 +113,11 @@ class FusedGemmEpilogueOpGradMaker : public framework::SingleGradOpMaker<T> {
if (act_type != "none") {
op->SetInput("ReserveSpace", this->Output("ReserveSpace"));
}
op->SetInput("DOut", this->OutputGrad("Out"));
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));

op->SetOutput("DX", this->InputGrad("X"));
op->SetOutput("DY", this->InputGrad("Y"));
op->SetOutput("DBias", this->InputGrad("Bias"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y"));
op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里随意修改Op定义会有问题,因为在op定义后,很多其他组件可能就默认使用定义好的名字,这里修改后会造成命名对不上,建议恢复原状


op->SetAttrMap(this->Attrs());
}
Expand All @@ -367,12 +127,18 @@ class FusedGemmEpilogueOpGradMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(
fused_gemm_epilogue,
ops::FusedGemmEpilogueOp,
ops::FusedGemmEpilogueOpMaker,
ops::FusedGemmEpilogueOpGradMaker<paddle::framework::OpDesc>,
ops::FusedGemmEpilogueOpGradMaker<paddle::imperative::OpBase>);
DECLARE_INFER_SHAPE_FUNCTOR(fused_gemm_epilogue,
FusedGemmEpilogueInferShapeFunctor,
PD_INFER_META(phi::FusedGemmEpilogueInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(fused_gemm_epilogue_grad,
FusedGemmEpilogueGradInferShapeFunctor,
PD_INFER_META(phi::FusedGemmEpilogueGradInferMeta));
REGISTER_OPERATOR(fused_gemm_epilogue,
ops::FusedGemmEpilogueOp,
ops::FusedGemmEpilogueOpMaker,
ops::FusedGemmEpilogueOpGradMaker<paddle::framework::OpDesc>,
ops::FusedGemmEpilogueOpGradMaker<paddle::imperative::OpBase>,
FusedGemmEpilogueInferShapeFunctor);
REGISTER_OPERATOR(fused_gemm_epilogue_grad,
ops::FusedGemmEpilogueGradOp,
ops::FusedGemmEpilogueGradOpMaker);
FusedGemmEpilogueGradInferShapeFunctor);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里可以不删除ops::FusedGemmEpilogueGradOpMaker吗

Loading