-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
【Hackathon 5th No.112】move fused_gemm_epilogue to phi and add the yaml of identity_loss #59363
Merged
+555
−776
Merged
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit
Hold shift + click to select a range
341afcc
try to move fused_gemm_epilogue to phi
zeroRains 31eb4eb
try to fix bug
zeroRains fafe072
update
zeroRains 6f7a3d7
fix the bug in config
zeroRains d5d895e
fix the bug
zeroRains 628cc43
config the identity_loss of yaml
zeroRains 456022c
fix bug
zeroRains 249dc76
temp
zeroRains 3a74701
fix bug
zeroRains 3f1de5b
remove the #include
zeroRains 1b0c20a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
zeroRains cb4da1b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
zeroRains 96a92f6
roback
zeroRains b05872d
remove file in fluid
zeroRains 6cbe87d
add the output in op_comat
zeroRains bc1ec4a
try to move xpu
zeroRains 6531fb7
fix config
zeroRains c98a435
roback the flashatten
zeroRains 12a8411
change the ops/compate
zeroRains b616534
move the typename to kernel
zeroRains 29f0b5c
remove VLOG
zeroRains 7878144
fix bug
zeroRains 8841394
remove the same define
zeroRains 1e64184
fix bug
zeroRains 5aa79a0
roback the sig file
zeroRains c41b7eb
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
zeroRains cc19bea
Update fused_gemm_epilogue_sig.cc
zeroRains fca12a8
Update fused_gemm_epilogue_sig.cc
zeroRains ac92a3d
fix a small bug
zeroRains File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,8 +13,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/fluid/framework/infershape_utils.h" | ||
#include "paddle/fluid/framework/op_registry.h" | ||
#include "paddle/fluid/framework/op_version_registry.h" | ||
#include "paddle/phi/core/infermeta_utils.h" | ||
#include "paddle/phi/infermeta/fusion.h" | ||
#include "paddle/phi/kernels/funcs/fused_gemm_epilogue.h" | ||
|
||
namespace paddle { | ||
|
@@ -25,107 +28,6 @@ class FusedGemmEpilogueOp : public framework::OperatorWithKernel { | |
using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
||
protected: | ||
void InferShape(framework::InferShapeContext* ctx) const override { | ||
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedGemmEpilogueOp"); | ||
OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "FusedGemmEpilogueOp"); | ||
OP_INOUT_CHECK( | ||
ctx->HasInput("Bias"), "Output", "Bias", "FusedGemmEpilogueOp"); | ||
OP_INOUT_CHECK( | ||
ctx->HasOutput("Out"), "Output", "Out", "FusedGemmEpilogueOp"); | ||
|
||
auto x_dims = ctx->GetInputDim("X"); | ||
auto y_dims = ctx->GetInputDim("Y"); | ||
auto bias_dims = ctx->GetInputDim("Bias"); | ||
auto trans_x = ctx->Attrs().Get<bool>("trans_x"); | ||
auto trans_y = ctx->Attrs().Get<bool>("trans_y"); | ||
|
||
PADDLE_ENFORCE_EQ( | ||
y_dims.size(), | ||
2, | ||
platform::errors::InvalidArgument( | ||
"The Input tensor Y's dimension of FusedGemmEpilogueOp " | ||
" should be 2, but got %d.", | ||
y_dims.size())); | ||
|
||
PADDLE_ENFORCE_GE( | ||
x_dims.size(), | ||
2, | ||
platform::errors::InvalidArgument( | ||
"The Input tensor X's dimension of FusedGemmEpilogueOp " | ||
" should be >= 2, but got %d.", | ||
x_dims.size())); | ||
|
||
PADDLE_ENFORCE_EQ( | ||
bias_dims.size(), | ||
1, | ||
platform::errors::InvalidArgument( | ||
"The Input tensor bias's dimension of FusedGemmEpilogueOp " | ||
" should be == 1, but got %d.", | ||
bias_dims.size())); | ||
|
||
PADDLE_ENFORCE_EQ(bias_dims[0], | ||
trans_y ? y_dims[0] : y_dims[1], | ||
platform::errors::InvalidArgument( | ||
"The Input tensor bias's dimension 0" | ||
" should be == Y[-1], but got bias's shape = [%s] " | ||
"and Y's shape = [%s]", | ||
bias_dims, | ||
y_dims)); | ||
|
||
auto x_mat_dims = | ||
phi::flatten_to_2d(x_dims, trans_x ? 1 : x_dims.size() - 1); | ||
|
||
int K_from_x = trans_x ? x_mat_dims[0] : x_mat_dims[1]; | ||
int K_from_y = trans_y ? y_dims[1] : y_dims[0]; | ||
|
||
PADDLE_ENFORCE_EQ( | ||
K_from_x, | ||
K_from_y, | ||
platform::errors::InvalidArgument( | ||
"The last dimension of X should be equal with Y's first dimension." | ||
"But received X[-1] = [%d], Y[0] = [%d].", | ||
K_from_x, | ||
K_from_y)); | ||
|
||
std::vector<int64_t> out_dims; | ||
out_dims.reserve(static_cast<size_t>(x_dims.size())); | ||
if (trans_x) { | ||
for (int i = 1; i < x_dims.size(); ++i) out_dims.push_back(x_dims[i]); | ||
} else { | ||
for (int i = 0; i < x_dims.size() - 1; ++i) out_dims.push_back(x_dims[i]); | ||
} | ||
|
||
if (trans_y) { | ||
out_dims.push_back(y_dims[0]); | ||
} else { | ||
out_dims.push_back(y_dims[1]); | ||
} | ||
ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); | ||
|
||
auto activation = ctx->Attrs().Get<std::string>("activation"); | ||
if (ctx->HasOutput("ReserveSpace")) { | ||
ctx->SetOutputDim("ReserveSpace", phi::make_ddim(out_dims)); | ||
|
||
if (activation == "none") { | ||
PADDLE_THROW(platform::errors::InvalidArgument( | ||
"The ReserveSpace would not be used when activation = \"none\"")); | ||
} else { | ||
int min_size_of_n = activation == "relu" ? 128 : 8; | ||
int N_size = trans_y ? y_dims[0] : y_dims[1]; | ||
PADDLE_ENFORCE_EQ( | ||
N_size % min_size_of_n, | ||
0, | ||
platform::errors::InvalidArgument( | ||
"The output dimension N (X(MxK) * Y(KxN) = C(MxN)) " | ||
"should be multiple of %d when auxiliary_key given " | ||
"and activation=%s, but got N = %d.", | ||
min_size_of_n, | ||
activation, | ||
N_size)); | ||
} | ||
} | ||
} | ||
|
||
phi::KernelKey GetExpectedKernelType( | ||
const framework::ExecutionContext& ctx) const override { | ||
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); | ||
|
@@ -188,156 +90,14 @@ class FusedGemmEpilogueGradOp : public framework::OperatorWithKernel { | |
using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
||
protected: | ||
void InferShape(framework::InferShapeContext* ctx) const override { | ||
OP_INOUT_CHECK( | ||
ctx->HasInput("DOut"), "Input", "DOut", "FusedGemmEpilogueGradOp"); | ||
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedGemmEpilogueGradOp"); | ||
OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "FusedGemmEpilogueGradOp"); | ||
OP_INOUT_CHECK(ctx->HasOutput("DY"), "Output", "DY", "FusedGemmEpilogueOp"); | ||
|
||
auto dout_dims = ctx->GetInputDim("DOut"); | ||
auto x_dims = ctx->GetInputDim("X"); | ||
auto y_dims = ctx->GetInputDim("Y"); | ||
auto trans_x = ctx->Attrs().Get<bool>("trans_x"); | ||
auto trans_y = ctx->Attrs().Get<bool>("trans_y"); | ||
|
||
PADDLE_ENFORCE_GE( | ||
dout_dims.size(), | ||
2, | ||
platform::errors::InvalidArgument( | ||
"The Input tensor DOut's dimension of FusedGemmEpilogueGradOp " | ||
" should be >= 2, but got %d.", | ||
dout_dims.size())); | ||
|
||
PADDLE_ENFORCE_EQ( | ||
y_dims.size(), | ||
2, | ||
platform::errors::InvalidArgument( | ||
"The Input tensor Y's dimension of FusedGemmEpilogueGradOp " | ||
" should be 2, but got %d.", | ||
y_dims.size())); | ||
|
||
PADDLE_ENFORCE_GE( | ||
x_dims.size(), | ||
2, | ||
platform::errors::InvalidArgument( | ||
"The Input tensor X's dimension of FusedGemmEpilogueGradOp " | ||
" should be >= 2, but got %d.", | ||
x_dims.size())); | ||
|
||
PADDLE_ENFORCE_EQ( | ||
dout_dims.size(), | ||
x_dims.size(), | ||
platform::errors::InvalidArgument( | ||
"The Input tensor DOut's and X's dimension of " | ||
"FusedGemmEpilogueGradOp " | ||
" should be the same, but got DOut's dim = %d and X's = %d.", | ||
dout_dims.size(), | ||
x_dims.size())); | ||
|
||
auto dout_mat_dims = phi::flatten_to_2d(dout_dims, dout_dims.size() - 1); | ||
auto x_mat_dims = phi::flatten_to_2d(x_dims, x_dims.size() - 1); | ||
|
||
PADDLE_ENFORCE_EQ( | ||
dout_mat_dims[1], | ||
trans_y ? y_dims[0] : y_dims[1], | ||
platform::errors::InvalidArgument( | ||
"The last dimension of DOut should be equal with Y's last" | ||
"dimension. But received DOut[-1] = [%d], Y[1] = [%d].", | ||
dout_mat_dims[1], | ||
y_dims[1])); | ||
|
||
PADDLE_ENFORCE_EQ( | ||
dout_mat_dims[0], | ||
trans_x ? x_mat_dims[1] : x_mat_dims[0], | ||
platform::errors::InvalidArgument( | ||
"The first dimension of DOut should be equal with X's first" | ||
"dimension. But received DOut[0] = [%d], Y[0] = [%d].", | ||
dout_mat_dims[0], | ||
x_mat_dims[0])); | ||
|
||
auto activation_grad = ctx->Attrs().Get<std::string>("activation_grad"); | ||
if (activation_grad != "none" && !ctx->HasInput("ReserveSpace")) { | ||
PADDLE_ENFORCE_EQ(true, | ||
false, | ||
platform::errors::InvalidArgument( | ||
"The ReserveSpace should not be empty. " | ||
"when activation == {relu_grad, gelu_grad}.")); | ||
} | ||
|
||
if (ctx->HasOutput("DX")) { | ||
ctx->SetOutputDim("DX", x_dims); | ||
} | ||
ctx->SetOutputDim("DY", y_dims); | ||
|
||
if (ctx->HasOutput("DBias")) { | ||
int64_t dbias_dim = trans_y ? y_dims[0] : y_dims[1]; | ||
ctx->SetOutputDim("DBias", phi::make_ddim({dbias_dim})); | ||
} | ||
} | ||
|
||
phi::KernelKey GetExpectedKernelType( | ||
const framework::ExecutionContext& ctx) const override { | ||
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "DOut"); | ||
auto data_type = OperatorWithKernel::IndicateVarDataType( | ||
ctx, framework::GradVarName("Out")); | ||
return phi::KernelKey(data_type, ctx.GetPlace()); | ||
} | ||
}; | ||
|
||
class FusedGemmEpilogueGradOpMaker : public framework::OpProtoAndCheckerMaker { | ||
public: | ||
void Make() override { | ||
AddInput("DOut", | ||
"The input grad tensor to Out of Out = (Act(X) * Y) + bias"); | ||
AddInput("X", "The input tensor X of Out = (Act(X) * Y) + bias"); | ||
AddInput("Y", "The input tensor Y of Out = (Act(X) * Y) + bias"); | ||
AddInput("ReserveSpace", | ||
R"DOC(A GPU space to fetch | ||
auxiliary data pointer. It is used to pass auxiliary data pointer | ||
for fused_gemm_epilogue_grad op. If not given (empty string), the | ||
auxiliary mode would not be enable.)DOC") | ||
.AsDispensable(); | ||
|
||
AddOutput("DX", "The output grad tensor to X of Out = (Act(X) * Y) + bias.") | ||
.AsDispensable(); | ||
AddOutput("DY", | ||
"The output grad tensor to Y of Out = (Act(X) * Y) + bias."); | ||
AddOutput("DBias", | ||
"The output grad tensor to bias of Out = (Act(X) * Y) + bias.") | ||
.AsDispensable(); | ||
AddAttr<bool>( | ||
"trans_x", | ||
R"DOC((bool, default false), Whether to transpose input tensor X | ||
or not. The input tensor X coulbe be more than two dimension. When | ||
set trans_x=true, it would fully reverse X. For instant: X with shpae | ||
[d0, d1, d2, d3] -> [d3, d2, d1, d0].)DOC") | ||
.SetDefault(false); | ||
AddAttr<bool>( | ||
"trans_y", | ||
R"DOC((bool, default false), Whether to transpose input tensor Y | ||
or not. The input tensor Y should be two dimension. When | ||
set trans_y=true, it would transpose Y. For instant: Y with shpae | ||
[d0, d1] -> [d1, d0].)DOC") | ||
.SetDefault(false); | ||
|
||
AddAttr<std::string>( | ||
"activation_grad", | ||
R"DOC((string, default none), The backward activation function. It could be | ||
one of {none, relu_grad, gelu_grad}. When none is given, The backward Act would | ||
be null operations)DOC") | ||
.SetDefault("none"); | ||
|
||
AddComment(R"DOC( | ||
FusedGemmEpilogueGrad Operator | ||
This operator is used to perform backward of Elementwise_add(Matmul(Activeation(X), Y), bias). | ||
It is equal to Activation (None, ReLU or GeLU) + paddle.nn.Linear. | ||
|
||
Note: | ||
X could be more than two dimension and would be flatten to 2D for computing. | ||
X with shape [d0, d1, d2, d3] -> X_2D with shape [d0*d1*d2, d3] | ||
)DOC"); | ||
} | ||
}; | ||
|
||
template <typename T> | ||
class FusedGemmEpilogueOpGradMaker : public framework::SingleGradOpMaker<T> { | ||
public: | ||
|
@@ -353,11 +113,11 @@ class FusedGemmEpilogueOpGradMaker : public framework::SingleGradOpMaker<T> { | |
if (act_type != "none") { | ||
op->SetInput("ReserveSpace", this->Output("ReserveSpace")); | ||
} | ||
op->SetInput("DOut", this->OutputGrad("Out")); | ||
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); | ||
|
||
op->SetOutput("DX", this->InputGrad("X")); | ||
op->SetOutput("DY", this->InputGrad("Y")); | ||
op->SetOutput("DBias", this->InputGrad("Bias")); | ||
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); | ||
op->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y")); | ||
op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); | ||
|
||
op->SetAttrMap(this->Attrs()); | ||
} | ||
|
@@ -367,12 +127,18 @@ class FusedGemmEpilogueOpGradMaker : public framework::SingleGradOpMaker<T> { | |
} // namespace paddle | ||
|
||
namespace ops = paddle::operators; | ||
REGISTER_OPERATOR( | ||
fused_gemm_epilogue, | ||
ops::FusedGemmEpilogueOp, | ||
ops::FusedGemmEpilogueOpMaker, | ||
ops::FusedGemmEpilogueOpGradMaker<paddle::framework::OpDesc>, | ||
ops::FusedGemmEpilogueOpGradMaker<paddle::imperative::OpBase>); | ||
DECLARE_INFER_SHAPE_FUNCTOR(fused_gemm_epilogue, | ||
FusedGemmEpilogueInferShapeFunctor, | ||
PD_INFER_META(phi::FusedGemmEpilogueInferMeta)); | ||
DECLARE_INFER_SHAPE_FUNCTOR(fused_gemm_epilogue_grad, | ||
FusedGemmEpilogueGradInferShapeFunctor, | ||
PD_INFER_META(phi::FusedGemmEpilogueGradInferMeta)); | ||
REGISTER_OPERATOR(fused_gemm_epilogue, | ||
ops::FusedGemmEpilogueOp, | ||
ops::FusedGemmEpilogueOpMaker, | ||
ops::FusedGemmEpilogueOpGradMaker<paddle::framework::OpDesc>, | ||
ops::FusedGemmEpilogueOpGradMaker<paddle::imperative::OpBase>, | ||
FusedGemmEpilogueInferShapeFunctor); | ||
REGISTER_OPERATOR(fused_gemm_epilogue_grad, | ||
ops::FusedGemmEpilogueGradOp, | ||
ops::FusedGemmEpilogueGradOpMaker); | ||
FusedGemmEpilogueGradInferShapeFunctor); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里可以不删除ops::FusedGemmEpilogueGradOpMaker吗 |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里随意修改Op定义会有问题,因为在op定义后,很多其他组件可能就默认使用定义好的名字,这里修改后会造成命名对不上,建议恢复原状