Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added elementwise_sub_mkldnn operator #35662

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b5374a1
Add elementwise_sub_mkldnn_op without grad
piotrekobi Sep 10, 2021
97b2293
Add test to static_mode_white_list
piotrekobi Sep 10, 2021
8f56b90
Refactor code, change license years
piotrekobi Sep 10, 2021
2b1be1c
Remove invalid grad implementation
piotrekobi Sep 13, 2021
4698b5b
Fix element_wise_sub_op test
piotrekobi Sep 13, 2021
834911e
Fix CI Approval error
piotrekobi Sep 13, 2021
90e3d16
Remove unnecessary EltwiseSubMKLDNNGradKernel class
piotrekobi Sep 13, 2021
1c71002
Fix CI Approval 2
piotrekobi Sep 13, 2021
63c9c9a
Fix CI Approval 3
piotrekobi Sep 13, 2021
9980ccc
Fix CI Approval Attempt #4
piotrekobi Sep 13, 2021
aaea659
Fix CI Approve Attempt #5
piotrekobi Sep 13, 2021
5b0e50c
Fix CI Approval Attempt #6
piotrekobi Sep 14, 2021
084c56f
Fix CI Approval Attemt #7
piotrekobi Sep 14, 2021
24782f3
Change test names containing add to sub
piotrekobi Sep 14, 2021
22d2225
Fix old tests testing add instead of sub
piotrekobi Sep 14, 2021
e588c92
Copy grad implementation from elementwise_add_mkldnn
piotrekobi Sep 14, 2021
1135aa3
CI test fix attempt
piotrekobi Sep 16, 2021
ad491f3
Revert "CI test fix attempt"
piotrekobi Sep 16, 2021
49d9142
Fix CI attempt 2
piotrekobi Sep 16, 2021
fc02000
Fix elementwise_sub tests, temporary mkldnn broadcast test disable
piotrekobi Sep 20, 2021
b4d7c9e
Add working implementation of elementwise_sub grad
piotrekobi Sep 20, 2021
a6822c6
Fix build errors caused by pull
piotrekobi Sep 21, 2021
57fe561
Fix format error
piotrekobi Sep 21, 2021
557ff38
Fix format error 2
piotrekobi Sep 21, 2021
314f214
Disable elementwise_sub_mkldnn test on GPU
piotrekobi Sep 21, 2021
fc3b122
Apply fix for paddle.fluid import
piotrekobi Sep 22, 2021
56852cd
Revert changes of test_elementwise_sub and Fix mkldnn test
piotrekobi Sep 22, 2021
0dcc8e2
Revert "Apply fix for paddle.fluid import"
piotrekobi Sep 22, 2021
9c98cc8
fix bug of module 'paddle' has no attribute 'fluid' for python3.6 (#3…
zhangbo9674 Sep 22, 2021
ea395f5
Add changes suggested by reviewers
piotrekobi Sep 23, 2021
f3010a0
Change @unittest.skipIf... to @OpTestTool.skip_if_not_cpu_bf16() to s…
piotrekobi Sep 23, 2021
08a5c69
Remove check_dygraph=False to satisify CI Approval
piotrekobi Sep 23, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 132 additions & 0 deletions paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@

// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h"
namespace paddle {
namespace framework {
class ExecutionContext;
} // namespace framework
namespace platform {
class CPUDeviceContext;
struct CPUPlace;
} // namespace platform
} // namespace paddle

namespace paddle {
namespace operators {
template <typename T>
class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElemwiseGradKernel<T>::Compute(ctx);
using Tensor = framework::Tensor;

auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& onednn_engine = dev_ctx.GetEngine();

auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));

auto tz = framework::vectorize<int64_t>(dout->dims());
memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type());
platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type,
onednn_engine);

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
auto reorder_src_memory_p = handler.AcquireSrcMemory(
dout->format(), platform::to_void_cast(dout->data<T>()));

if (dx) {
auto reorder_dst_memory_p =
handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace());
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);

reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();

dx->set_layout(DataLayout::kMKLDNN);
dx->set_format(platform::GetMKLDNNFormat(*reorder_dst_memory_p));
}

if (dy) {
// Direct copy
if (dout->dims() == dy->dims()) {
auto reorder_dst_memory_p =
handler.AcquireDstMemory(dy, dout->format(), ctx.GetPlace());

dnnl::primitive_attr reorder_attr;
std::vector<float> scales = {-1};
reorder_attr.set_output_scales(0, scales);
auto reorder_p = std::make_shared<dnnl::reorder>(
*(reorder_src_memory_p), *(reorder_dst_memory_p), reorder_attr);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p,
*reorder_dst_memory_p);
astream.wait();

dy->set_layout(DataLayout::kMKLDNN);
dy->set_format(platform::GetMKLDNNFormat(*reorder_dst_memory_p));
} else {
// Broadcasting

dnnl::post_ops po;
po.append_eltwise(1.0f, dnnl::algorithm::eltwise_linear, -1.0f, 0);
dnnl::primitive_attr attr;
attr.set_post_ops(po);

platform::ReductionMKLDNNHandler<T> handler_sum(
dnnl::algorithm::reduction_sum, 0.0f, 0.0f, onednn_engine,
ctx.GetPlace(), dout, dy, CalculateBroadcastedDims(dout, dy), attr);

auto dy_memory_p = handler_sum.AcquireDstMemory(dy);
auto reduction_p = handler_sum.AcquireForwardPrimitive();

reduction_p->execute(astream, {
{DNNL_ARG_SRC, *reorder_src_memory_p},
{DNNL_ARG_DST, *dy_memory_p},
});
astream.wait();

dy->set_layout(DataLayout::kMKLDNN);
dy->set_format(
platform::GetMKLDNNFormat(dy_memory_p->get_desc().reshape(
paddle::framework::vectorize<int64_t>(dy->dims()))));
This conversation was marked as resolved.
Show resolved Hide resolved
}
}
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OP_KERNEL(
elementwise_sub, MKLDNN, paddle::platform::CPUPlace,
ops::EltwiseMKLDNNKernel<float, dnnl::algorithm::binary_sub>,
ops::EltwiseMKLDNNKernel<paddle::platform::bfloat16,
This conversation was marked as resolved.
Show resolved Hide resolved
dnnl::algorithm::binary_sub>,
ops::EltwiseMKLDNNKernel<int8_t, dnnl::algorithm::binary_sub>,
ops::EltwiseMKLDNNKernel<uint8_t, dnnl::algorithm::binary_sub>)

REGISTER_OP_KERNEL(elementwise_sub_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::EltwiseSubMKLDNNGradKernel<paddle::platform::bfloat16>,
This conversation was marked as resolved.
Show resolved Hide resolved
ops::EltwiseSubMKLDNNGradKernel<float>)
16 changes: 11 additions & 5 deletions paddle/fluid/platform/mkldnn_reuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ limitations under the License. */
#include <string>
#include <utility>
#include <vector>

#include "boost/optional.hpp"
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/operator.h"
Expand Down Expand Up @@ -929,7 +930,6 @@ class BroadcastDataMKLDNNHandler
std::shared_ptr<mkldnn::memory> AcquireDstMemory(framework::Tensor* output) {
T_out* ptr = output->mutable_data<T_out>(
this->place_, this->fwd_pd_->dst_desc().get_size());
;
memset(ptr, 0, this->fwd_pd_->dst_desc().get_size());
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr);
}
Expand All @@ -942,7 +942,8 @@ class ReductionMKLDNNHandler
ReductionMKLDNNHandler(const dnnl::algorithm algo, const float p,
const float eps, const mkldnn::engine engine,
platform::Place cpu_place, const Tensor* x,
const Tensor* y, std::vector<int64_t> y_tz)
const Tensor* y, std::vector<int64_t> y_tz,
const dnnl::primitive_attr& attr = NULL)
: platform::MKLDNNHandlerNoCachingT<T, dnnl::reduction>(engine,
cpu_place) {
PADDLE_ENFORCE_EQ(
Expand All @@ -959,7 +960,10 @@ class ReductionMKLDNNHandler
const auto y_md =
memory::desc(y_tz, platform::MKLDNNGetDataType<T>(), x->format());

this->AcquireForwardPrimitiveDescriptor(algo, x_md, y_md, p, eps);
if (attr)
this->AcquireForwardPrimitiveDescriptor(attr, algo, x_md, y_md, p, eps);
else
this->AcquireForwardPrimitiveDescriptor(algo, x_md, y_md, p, eps);
}
};

Expand All @@ -981,8 +985,9 @@ class ActivationMKLDNNHandler
if (ctx.Type() == "scale") {
bool bias_after_scale = ctx.Attr<bool>("bias_after_scale");
auto* scale_tensor = ctx.Input<Tensor>("ScaleTensor");
alpha = (scale_tensor == nullptr) ? ctx.Attr<float>("scale")
: (float)*(scale_tensor->data<T>());
alpha = (scale_tensor == nullptr)
? ctx.Attr<float>("scale")
: static_cast<float>(*(scale_tensor->data<T>()));
beta = ctx.Attr<float>("bias");
// if bias_after_scale == true
// out = scale*X + bias
Expand Down Expand Up @@ -1514,6 +1519,7 @@ static void SetDstMemoryQuantized(
T* output_data = output->mutable_data<T>(ctx.GetPlace());
const size_t dst_dims = dst_tz.size();
MKLDNNMemoryFormat dst_fmt;

PADDLE_ENFORCE_LE(dst_dims, 5, platform::errors::InvalidArgument(
"Dst memory for quantization can not have "
"dims > 5. But received dst_dims is %d.",
Expand Down
24 changes: 11 additions & 13 deletions python/paddle/fluid/dygraph/amp/auto_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import paddle
import operator
import types
import paddle.fluid as fluid

__all__ = ['amp_guard', 'amp_decorate']

Expand Down Expand Up @@ -220,16 +219,16 @@ def amp_guard(enable=True,
.. code-block:: python

import numpy as np
import paddle.fluid as fluid
import paddle

data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
with fluid.dygraph.guard():
conv2d = fluid.dygraph.Conv2D(3, 2, 3)
data = fluid.dygraph.to_variable(data)
with fluid.dygraph.amp_guard():
with paddle.fluid.dygraph.guard():
conv2d = paddle.fluid.dygraph.Conv2D(3, 2, 3)
data = paddle.fluid.dygraph.to_variable(data)
with paddle.fluid.dygraph.amp_guard():
conv = conv2d(data)
print(conv.dtype) # FP16
with fluid.dygraph.amp_guard(enable=False):
with paddle.fluid.dygraph.amp_guard(enable=False):
conv = conv2d(data)
print(conv.dtype) # FP32

Expand Down Expand Up @@ -301,7 +300,7 @@ def __init__(self, save_dtype):
def __call__(self, state_dict):
for key in state_dict:
param = state_dict[key]
with fluid.dygraph.guard():
with paddle.fluid.dygraph.guard():
param_applied = paddle.cast(param, self._save_dtype)
param_applied.name = param.name
state_dict[key] = param_applied
Expand Down Expand Up @@ -335,16 +334,15 @@ def amp_decorate(models,
# required: gpu
# Demo1: single model and optimizer:
import paddle
import paddle.fluid as fluid

model = paddle.nn.Conv2D(3, 2, 3, bias_attr=False)
optimzier = paddle.optimizer.SGD(parameters=model.parameters())

model, optimizer = fluid.dygraph.amp_decorate(models=model, optimizers=optimzier, level='O2')
model, optimizer = paddle.fluid.dygraph.amp_decorate(models=model, optimizers=optimzier, level='O2')

data = paddle.rand([10, 3, 32, 32])

with fluid.dygraph.amp_guard(enable=True, custom_white_list=None, custom_black_list=None, level='O2'):
with paddle.fluid.dygraph.amp_guard(enable=True, custom_white_list=None, custom_black_list=None, level='O2'):
output = model(data)
print(output.dtype) # FP16

Expand All @@ -353,11 +351,11 @@ def amp_decorate(models,
model2 = paddle.nn.Conv2D(3, 2, 3, bias_attr=False)
optimizer2 = paddle.optimizer.Adam(parameters=model2.parameters())

models, optimizers = fluid.dygraph.amp_decorate(models=[model, model2], optimizers=[optimzier, optimizer2], level='O2')
models, optimizers = paddle.fluid.dygraph.amp_decorate(models=[model, model2], optimizers=[optimzier, optimizer2], level='O2')

data = paddle.rand([10, 3, 32, 32])

with fluid.dygraph.amp_guard(enable=True, custom_white_list=None, custom_black_list=None, level='O2'):
with paddle.fluid.dygraph.amp_guard(enable=True, custom_white_list=None, custom_black_list=None, level='O2'):
output = models[0](data)
output2 = models[1](data)
print(output.dtype) # FP16
Expand Down
Loading