Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ZzSean committed Mar 10, 2022
1 parent a1c358e commit 4329528
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 74 deletions.
45 changes: 11 additions & 34 deletions paddle/fluid/operators/log_softmax_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ limitations under the License. */
#include <string>
#include <unordered_map>
#include "paddle/fluid/operators/common_infer_shape_functions.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"

namespace paddle {
namespace operators {
Expand All @@ -32,25 +31,17 @@ class LogSoftmaxOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
// choose cudnn kernel if the runtime supported.
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::CanCUDNNBeUsed(ctx)) {
library = framework::LibraryType::kCUDNN;
}
#endif
auto input_data_type =
framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");

#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
this->CanMKLDNNBeUsed(ctx, input_data_type)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout,
library);
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};

Expand All @@ -65,11 +56,6 @@ class LogSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
"The dimension index of Input(x) to perform log_softmax,"
"default -1 for last dimension")
.SetDefault(-1);
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(false)
.AsExtra();
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false)
Expand Down Expand Up @@ -112,18 +98,9 @@ class LogSoftmaxGradOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
// choose cudnn kernel if the runtime supported.
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
auto input_data_type = OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out"));
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::CanCUDNNBeUsed(ctx)) {
library = framework::LibraryType::kCUDNN;
}
#endif
return framework::OpKernelType(input_data_type, ctx.device_context(),
layout, library);
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out")),
ctx.device_context());
}
};

Expand Down
61 changes: 25 additions & 36 deletions paddle/fluid/operators/log_softmax_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -506,45 +506,34 @@ class LogSoftmaxGradCUDNNKernel : public framework::OpKernel<T> {

namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
log_softmax, ops::LogSoftmaxKernel<plat::CUDADeviceContext, float>,
ops::LogSoftmaxKernel<plat::CUDADeviceContext, double>,
ops::LogSoftmaxKernel<plat::CUDADeviceContext, plat::float16>,
ops::LogSoftmaxKernel<plat::CUDADeviceContext, plat::bfloat16>);
REGISTER_OP_CUDA_KERNEL(
log_softmax_grad, ops::LogSoftmaxGradKernel<plat::CUDADeviceContext, float>,
ops::LogSoftmaxGradKernel<plat::CUDADeviceContext, double>,
ops::LogSoftmaxGradKernel<plat::CUDADeviceContext, plat::float16>,
ops::LogSoftmaxGradKernel<plat::CUDADeviceContext, plat::bfloat16>);
#ifdef PADDLE_WITH_HIP
REGISTER_OP_KERNEL(log_softmax, CUDNN, plat::CUDAPlace,
ops::LogSoftmaxCUDNNKernel<float>,
ops::LogSoftmaxCUDNNKernel<plat::float16>,
ops::LogSoftmaxCUDNNKernel<plat::bfloat16>);
REGISTER_OP_KERNEL(log_softmax_grad, CUDNN, plat::CUDAPlace,
ops::LogSoftmaxGradCUDNNKernel<float>,
ops::LogSoftmaxGradCUDNNKernel<plat::float16>,
ops::LogSoftmaxGradCUDNNKernel<plat::bfloat16>);
REGISTER_OP_CUDA_KERNEL(log_softmax, ops::LogSoftmaxCUDNNKernel<float>,
ops::LogSoftmaxCUDNNKernel<plat::float16>,
ops::LogSoftmaxCUDNNKernel<plat::bfloat16>);
REGISTER_OP_CUDA_KERNEL(log_softmax_grad, CUDNN, plat::CUDAPlace,
ops::LogSoftmaxGradCUDNNKernel<float>,
ops::LogSoftmaxGradCUDNNKernel<plat::float16>,
ops::LogSoftmaxGradCUDNNKernel<plat::bfloat16>);
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
REGISTER_OP_KERNEL(log_softmax, CUDNN, plat::CUDAPlace,
ops::LogSoftmaxCUDNNKernel<float>,
ops::LogSoftmaxCUDNNKernel<double>,
ops::LogSoftmaxCUDNNKernel<plat::float16>,
ops::LogSoftmaxCUDNNKernel<plat::bfloat16>);
REGISTER_OP_KERNEL(log_softmax_grad, CUDNN, plat::CUDAPlace,
ops::LogSoftmaxGradCUDNNKernel<float>,
ops::LogSoftmaxGradCUDNNKernel<double>,
ops::LogSoftmaxGradCUDNNKernel<plat::float16>,
ops::LogSoftmaxGradCUDNNKernel<plat::bfloat16>);
REGISTER_OP_CUDA_KERNEL(log_softmax, ops::LogSoftmaxCUDNNKernel<float>,
ops::LogSoftmaxCUDNNKernel<double>,
ops::LogSoftmaxCUDNNKernel<plat::float16>,
ops::LogSoftmaxCUDNNKernel<plat::bfloat16>);
REGISTER_OP_CUDA_KERNEL(log_softmax_grad, ops::LogSoftmaxGradCUDNNKernel<float>,
ops::LogSoftmaxGradCUDNNKernel<double>,
ops::LogSoftmaxGradCUDNNKernel<plat::float16>,
ops::LogSoftmaxGradCUDNNKernel<plat::bfloat16>);
#else
REGISTER_OP_KERNEL(log_softmax, CUDNN, plat::CUDAPlace,
ops::LogSoftmaxCUDNNKernel<float>,
ops::LogSoftmaxCUDNNKernel<double>,
ops::LogSoftmaxCUDNNKernel<plat::float16>);
REGISTER_OP_KERNEL(log_softmax_grad, CUDNN, plat::CUDAPlace,
ops::LogSoftmaxGradCUDNNKernel<float>,
ops::LogSoftmaxGradCUDNNKernel<double>,
ops::LogSoftmaxGradCUDNNKernel<plat::float16>);
REGISTER_OP_CUDA_KERNEL(
log_softmax, ops::LogSoftmaxCUDNNKernel<float>,
ops::LogSoftmaxCUDNNKernel<double>,
ops::LogSoftmaxCUDNNKernel<plat::float16>,
ops::LogSoftmaxKernel<plat::CUDADeviceContext, plat::bfloat16>);
REGISTER_OP_CUDA_KERNEL(
log_softmax_grad, ops::LogSoftmaxGradCUDNNKernel<float>,
ops::LogSoftmaxGradCUDNNKernel<double>,
ops::LogSoftmaxGradCUDNNKernel<plat::float16>,
ops::LogSoftmaxGradKernel<plat::CUDADeviceContext, plat::bfloat16>);
#endif
#endif
6 changes: 2 additions & 4 deletions python/paddle/nn/functional/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1359,12 +1359,11 @@ def log_softmax(x, axis=-1, dtype=None, name=None):

if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
dtype = convert_np_dtype_to_dtype_(dtype)
use_cudnn = True

if in_dynamic_mode():
if dtype is not None:
x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
return _C_ops.log_softmax(x, 'axis', axis, 'use_cudnn', use_cudnn)
return _C_ops.log_softmax(x, 'axis', axis)

if dtype is None:
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
Expand All @@ -1389,8 +1388,7 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
type='log_softmax',
inputs={'X': out_cast},
outputs={'Out': out},
attrs={'axis': axis,
'use_cudnn': use_cudnn})
attrs={'axis': axis})

return out

Expand Down

0 comments on commit 4329528

Please sign in to comment.