Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[FEATURE] add oneDNN support for numpy transpose (#20419)
Browse files Browse the repository at this point in the history
* numpy transpose onednn usage

* remove unnecessary whitespace

* remove unnecessary whitespace

* remove unnecessary param

* formatting changes, cleanup

* remove unnecessary lines

* template convert param

* newline at end

* remove unused declarations

* whitespace, guard comments

* sanity fix

* formatting

* separate error tests transpose

* formatting

* separate transpose error tests

* transpose header dnnl

* format files sanity

* move include transpose

* unify param templates

* format, rename funcs

* switch include order

* dont sort includes for transpose

* remove clang off section

* delete unnecessary newline

* add newlines

* remove whitespace

* remove whitespace
  • Loading branch information
RafLit authored Nov 10, 2021
1 parent af1622e commit 16fed6e
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 84 deletions.
3 changes: 1 addition & 2 deletions src/operator/nn/dnnl/dnnl_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ struct ConvolutionParam;
struct DeconvolutionParam;
struct SoftmaxParam;
struct SoftmaxOutputParam;
struct TransposeParam;
struct ReshapeParam;
struct LayerNormParam;
bool SupportDNNLAct(const ActivationParam& param);
Expand All @@ -194,7 +193,7 @@ bool SupportDNNLDeconv(const DeconvolutionParam& params, const NDArray& input);
bool SupportDNNLSoftmax(const SoftmaxParam& param, const NDArray& input, const NDArray& output);
bool SupportDNNLLogSoftmax(const SoftmaxParam& param, const NDArray& input, const NDArray& output);
bool SupportDNNLSoftmaxOutput(const SoftmaxOutputParam& param);
bool SupportDNNLTranspose(const TransposeParam& param, const NDArray& data);
bool SupportDNNLTranspose(const NDArray& data);
bool SupportDNNLBatchDot(const std::vector<NDArray>& inputs, const NDArray& output);
bool SupportDNNLLayerNorm(const LayerNormParam& param, const std::vector<NDArray>& inputs);
bool SupportDNNLReshape(const NDArray& input, const NDArray& output);
Expand Down
1 change: 1 addition & 0 deletions src/operator/nn/dnnl/dnnl_ops-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ void DNNLLayerNormBackward(const nnvm::NodeAttrs& attrs,

void DNNLSum(const dnnl::memory& arr1, const dnnl::memory& arr2, const dnnl::memory& out);

template <class ParamType>
void DNNLTransposeForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const NDArray& data,
Expand Down
73 changes: 73 additions & 0 deletions src/operator/nn/dnnl/dnnl_transpose-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file dnnl_transpose-inl.h
* \author Rafal Litka
*/

#ifndef MXNET_OPERATOR_NN_DNNL_DNNL_TRANSPOSE_INL_H_
#define MXNET_OPERATOR_NN_DNNL_DNNL_TRANSPOSE_INL_H_
#if MXNET_USE_ONEDNN == 1

#include "./dnnl_base-inl.h"
#include "./dnnl_ops-inl.h"

#include "../../numpy/np_matrix_op-inl.h"

namespace mxnet {
namespace op {

bool SupportDNNLTranspose(const NDArray& data);

class DNNLTransposeFwd {
public:
std::shared_ptr<dnnl::memory> data_;
std::shared_ptr<dnnl::memory> out_;
std::shared_ptr<dnnl::memory::desc> dst_md_;
std::shared_ptr<dnnl::reorder> transpose_;
DNNLTransposeFwd(const NumpyTransposeParam& param, const NDArray& data);
void SetNewMem(const NDArray& data, const NDArray& output);
const dnnl::reorder& GetFwd() const;
void Execute() const;
};

DNNLTransposeFwd& GetTransposeForward(const NumpyTransposeParam& param, const NDArray& data);

template <class ParamType>
NumpyTransposeParam ConvertParamsToNumpy(const ParamType& param);

template <class ParamType>
void DNNLTransposeForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const NDArray& data,
const OpReqType& req,
const NDArray& output) {
const ParamType& org_param = nnvm::get<ParamType>(attrs.parsed);
auto param = ConvertParamsToNumpy<ParamType>(org_param);
auto fwd = GetTransposeForward(param, data);
fwd.SetNewMem(data, output);
fwd.Execute();
}

} // namespace op
} // namespace mxnet

#endif // MXNET_USE_ONEDNN == 1
#endif // MXNET_OPERATOR_NN_DNNL_DNNL_TRANSPOSE_INL_H_
153 changes: 75 additions & 78 deletions src/operator/nn/dnnl/dnnl_transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@

#if MXNET_USE_ONEDNN == 1

#include <dnnl.hpp>

#include "../../tensor/matrix_op-inl.h"

#include "./dnnl_transpose-inl.h"

namespace mxnet {
namespace op {

bool SupportDNNLTranspose(const TransposeParam& param, const NDArray& data) {
bool SupportDNNLTranspose(const NDArray& data) {
auto data_ndim = data.shape().ndim();

if (data_ndim > 4 || data_ndim == 0 || data.shape().Size() == 0 ||
Expand All @@ -42,107 +42,104 @@ bool SupportDNNLTranspose(const TransposeParam& param, const NDArray& data) {
return true;
}

typedef ParamOpSign<TransposeParam> DNNLTransposeSignature;

class DNNLTransposeForward {
public:
std::shared_ptr<dnnl::memory> data_;
std::shared_ptr<dnnl::memory> out_;
std::shared_ptr<dnnl::memory::desc> dst_md_;
std::shared_ptr<dnnl::reorder> transpose_;

public:
DNNLTransposeForward(const TransposeParam& param, const NDArray& data) {
auto shape = data.shape();
auto data_ndim = shape.ndim();
auto axes_ndim = param.axes.ndim();
auto axes = mxnet::TShape(data_ndim, -1);
if (axes_ndim == 0) {
for (int i = 0; i < data_ndim; i++) {
axes[i] = data_ndim - i - 1;
}
} else {
axes = param.axes;
}
typedef ParamOpSign<NumpyTransposeParam> DNNLTransposeSignature;

auto engine = CpuEngine::Get()->get_engine();
auto in_mem = data.GetDNNLData();
auto src_md = in_mem->get_desc();
data_ = std::make_shared<dnnl::memory>(src_md, engine, nullptr);

dnnl_dims_t strides;
dnnl_dims_t sh;
dim_t total_stride = 1;
for (int i = data_ndim - 1; i >= 0; i--) {
sh[i] = shape[i];
strides[axes[i]] = total_stride;
total_stride *= shape[axes[i]];
DNNLTransposeFwd::DNNLTransposeFwd(const NumpyTransposeParam& param, const NDArray& data) {
auto shape = data.shape();
auto data_ndim = shape.ndim();
auto axes_ndim = param.axes.ndim();
auto axes = mxnet::TShape(data_ndim, -1);
if (!ndim_is_known(axes_ndim)) {
for (int i = 0; i < data_ndim; i++) {
axes[i] = data_ndim - i - 1;
}
} else {
axes = param.axes;
}

dnnl_memory_desc_t dst_fmt;
dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, dnnl_f32, strides);
auto engine = CpuEngine::Get()->get_engine();
auto in_mem = data.GetDNNLData();
auto src_md = in_mem->get_desc();
data_ = std::make_shared<dnnl::memory>(src_md, engine, nullptr);

dnnl_dims_t strides;
dnnl_dims_t sh;
dim_t total_stride = 1;
for (int i = data_ndim - 1; i >= 0; i--) {
sh[i] = shape[i];
strides[axes[i]] = total_stride;
total_stride *= shape[axes[i]];
}

dst_md_ = std::make_shared<dnnl::memory::desc>(dst_fmt);
out_ = std::make_shared<dnnl::memory>(*dst_md_, engine, nullptr);
dnnl_memory_desc_t dst_fmt;
dnnl_memory_desc_init_by_strides(&dst_fmt, data_ndim, sh, dnnl_f32, strides);

transpose_ = std::make_shared<dnnl::reorder>(*data_, *out_);
}
dst_md_ = std::make_shared<dnnl::memory::desc>(dst_fmt);
out_ = std::make_shared<dnnl::memory>(*dst_md_, engine, nullptr);

void SetNewMem(const NDArray& data, const NDArray& output) {
if (data.IsDNNLData()) {
this->data_->set_data_handle(data.GetDNNLData()->get_data_handle());
} else {
MSHADOW_TYPE_SWITCH(
data.dtype(), DTYPE, { this->data_->set_data_handle(data.data().dptr<DTYPE>()); });
}
transpose_ = std::make_shared<dnnl::reorder>(*data_, *out_);
}

CHECK(!output.IsDNNLData());
void DNNLTransposeFwd::SetNewMem(const NDArray& data, const NDArray& output) {
if (data.IsDNNLData()) {
this->data_->set_data_handle(data.GetDNNLData()->get_data_handle());
} else {
MSHADOW_TYPE_SWITCH(
output.dtype(), DTYPE, { this->out_->set_data_handle(output.data().dptr<DTYPE>()); });
data.dtype(), DTYPE, { this->data_->set_data_handle(data.data().dptr<DTYPE>()); });
}

const dnnl::reorder& GetFwd() const {
return *transpose_;
}
CHECK(!output.IsDNNLData());
MSHADOW_TYPE_SWITCH(
output.dtype(), DTYPE, { this->out_->set_data_handle(output.data().dptr<DTYPE>()); });
}

void Execute() const {
auto stream = DNNLStream::Get();
dnnl_args_map_t net_args;
net_args.insert({{DNNL_ARG_FROM, *(data_)}, {DNNL_ARG_TO, *(out_)}});
stream->RegisterPrimArgs(*transpose_, net_args);
stream->Submit();
}
};
const dnnl::reorder& DNNLTransposeFwd::GetFwd() const {
return *transpose_;
}

void DNNLTransposeFwd::Execute() const {
auto stream = DNNLStream::Get();
dnnl_args_map_t net_args;
net_args.insert({{DNNL_ARG_FROM, *(data_)}, {DNNL_ARG_TO, *(out_)}});
stream->RegisterPrimArgs(*transpose_, net_args);
stream->Submit();
}

static DNNLTransposeForward& GetTransposeForward(const TransposeParam& param, const NDArray& data) {
DNNLTransposeFwd& GetTransposeForward(const NumpyTransposeParam& param, const NDArray& data) {
#if DMLC_CXX11_THREAD_LOCAL
static thread_local std::unordered_map<DNNLTransposeSignature, DNNLTransposeForward, OpHash> fwds;
static thread_local std::unordered_map<DNNLTransposeSignature, DNNLTransposeFwd, OpHash> fwds;
#else
static MX_THREAD_LOCAL std::unordered_map<DNNLTransposeSignature, DNNLTransposeForward, OpHash>
fwds;
static MX_THREAD_LOCAL std::unordered_map<DNNLTransposeSignature, DNNLTransposeFwd, OpHash> fwds;
#endif
DNNLTransposeSignature key(param);
key.AddSign(data);

auto it = fwds.find(key);
if (it == fwds.end()) {
DNNLTransposeForward fwd(param, data);
DNNLTransposeFwd fwd(param, data);
it = AddToCache(&fwds, key, fwd);
}
return it->second;
}

void DNNLTransposeForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const NDArray& data,
const OpReqType& req,
const NDArray& output) {
const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
template <>
NumpyTransposeParam ConvertParamsToNumpy<NumpyTransposeParam>(const NumpyTransposeParam& param) {
NumpyTransposeParam numpy_param;
numpy_param.axes = common::CanonicalizeAxes(param.axes);
return numpy_param;
}

auto fwd = GetTransposeForward(param, data);
fwd.SetNewMem(data, output);
fwd.Execute();
template <>
NumpyTransposeParam ConvertParamsToNumpy<TransposeParam>(const TransposeParam& param) {
NumpyTransposeParam numpy_param;
if (param.axes.ndim() == 0) {
numpy_param.axes = mxnet::TShape(-1, 0);
} else {
numpy_param.axes = param.axes;
}
return numpy_param;
}

} // namespace op
} // namespace mxnet
#endif
#endif // MXNET_USE_ONEDNN == 1
16 changes: 16 additions & 0 deletions src/operator/numpy/np_matrix_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ struct NumpyTransposeParam : public dmlc::Parameter<NumpyTransposeParam> {
"By default, reverse the dimensions, otherwise permute "
"the axes according to the values given.");
}

bool operator==(const NumpyTransposeParam& other) const {
return this->axes == other.axes;
}

void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
std::ostringstream axes_s;
axes_s << axes;
Expand Down Expand Up @@ -1868,4 +1873,15 @@ void NumpyDiagIndicesFromForward(const nnvm::NodeAttrs& attrs,
} // namespace op
} // namespace mxnet

namespace std {
template <>
struct hash<mxnet::op::NumpyTransposeParam> {
size_t operator()(const mxnet::op::NumpyTransposeParam& val) {
size_t ret = 0;
ret = dmlc::HashCombine(ret, val.axes);
return ret;
}
};
} // namespace std

#endif // MXNET_OPERATOR_NUMPY_NP_MATRIX_OP_INL_H_
43 changes: 42 additions & 1 deletion src/operator/numpy/np_matrix_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
#include <set>
#include "./np_matrix_op-inl.h"
#include "../nn/concat-inl.h"

#if MXNET_USE_ONEDNN == 1
#include "../nn/dnnl/dnnl_ops-inl.h"
#include "../nn/dnnl/dnnl_base-inl.h"
#include "../nn/dnnl/dnnl_transpose-inl.h"
#endif
namespace mxnet {
namespace op {

Expand Down Expand Up @@ -100,6 +104,38 @@ bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs,
SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
return shape_is_known(*in_attrs) && shape_is_known(*out_attrs);
}
#if MXNET_USE_ONEDNN == 1

static void NumpyTransposeComputeExCPU(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
if (req[0] == kNullOp) {
return;
}
CHECK(req[0] == kWriteTo || req[0] == kAddTo)
<< "Transpose only supports kNullOp, kWriteTo and kAddTo";
CHECK_EQ(inputs.size(), 1U);
CHECK_EQ(outputs.size(), 1U);

if (SupportDNNLTranspose(inputs[0]) && req[0] == kWriteTo) {
DNNLRun(DNNLTransposeForward<NumpyTransposeParam>, attrs, ctx, inputs[0], req[0], outputs[0]);
return;
}
FallBackCompute(NumpyTranspose<cpu>, attrs, ctx, inputs, req, outputs);
}

inline static bool NumpyTransposeStorageType(const nnvm::NodeAttrs& attrs,
const int dev_mask,
DispatchMode* dispatch_mode,
std::vector<int>* in_attrs,
std::vector<int>* out_attrs) {
CHECK_EQ(in_attrs->size(), 1U);
CHECK_EQ(out_attrs->size(), 1U);
return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
}
#endif

NNVM_REGISTER_OP(_npi_transpose)
.set_num_inputs(1)
Expand Down Expand Up @@ -134,6 +170,11 @@ NNVM_REGISTER_OP(_npi_transpose)
[](const NodeAttrs& attrs) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
#if MXNET_USE_ONEDNN == 1
.set_attr<bool>("TIsDNNL", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", NumpyTransposeComputeExCPU)
.set_attr<FInferStorageType>("FInferStorageType", NumpyTransposeStorageType)
#endif
.set_attr<nnvm::FListInputNames>("FListInputNames",
[](const NodeAttrs& attrs) {
return std::vector<std::string>{"a"};
Expand Down
Loading

0 comments on commit 16fed6e

Please sign in to comment.