Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some problem of kernel fallback in C++ API #44681

Merged
merged 3 commits into from
Jul 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 28 additions & 14 deletions paddle/phi/api/lib/api_custom_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adamw_impl(
}
}
std::string kernel_name = "adamw";
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name, {kernel_backend, kernel_layout, kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << kernel_name << " API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << kernel_name << " API kernel: " << kernel;
Expand Down Expand Up @@ -232,8 +233,9 @@ Tensor conv2d_impl(const Tensor& input,

VLOG(6) << "conv2d API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"conv2d", {kernel_backend, kernel_layout, kernel_data_type}, true);
const auto& kernel = kernel_result.kernel;
VLOG(6) << "conv2d API kernel: " << kernel;

auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
Expand Down Expand Up @@ -334,8 +336,9 @@ Tensor conv3d_impl(const Tensor& input,

VLOG(6) << "conv3d API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"conv3d", {kernel_backend, kernel_layout, kernel_data_type}, true);
const auto& kernel = kernel_result.kernel;
VLOG(6) << "conv3d API kernel: " << kernel;

auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
Expand Down Expand Up @@ -437,8 +440,9 @@ void conv2d_grad_impl(const Tensor& input,

VLOG(6) << "conv2d_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"conv2d_grad", {kernel_backend, kernel_layout, kernel_data_type}, true);
const auto& kernel = kernel_result.kernel;
VLOG(6) << "conv2d_grad API kernel: " << kernel;

auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
Expand Down Expand Up @@ -538,8 +542,9 @@ void conv3d_grad_impl(const Tensor& input,

VLOG(6) << "conv3d_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"conv3d_grad", {kernel_backend, kernel_layout, kernel_data_type}, true);
const auto& kernel = kernel_result.kernel;
VLOG(6) << "conv3d_grad API kernel: " << kernel;

auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
Expand Down Expand Up @@ -624,10 +629,11 @@ Tensor embedding_impl(const Tensor& x,
Tensor api_output;

if (phi::DenseTensor::classof(weight.impl().get())) {
const auto& kernel =
auto kernel_result =
phi::KernelFactory::Instance().SelectKernelOrThrowError(
"embedding",
{kernel_key.backend(), kernel_key.layout(), kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << "embedding API kernel: " << kernel;

auto input_x = PrepareData(x, kernel.InputAt(0), {});
Expand All @@ -652,10 +658,11 @@ Tensor embedding_impl(const Tensor& x,
(*kernel_fn)(*dev_ctx, *input_x, *input_weight, padding_idx, kernel_out);
}
} else {
const auto& kernel =
auto kernel_result =
phi::KernelFactory::Instance().SelectKernelOrThrowError(
"sparse_weight_embedding",
{kernel_key.backend(), kernel_key.layout(), kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << "sparse_weight_embedding API kernel: " << kernel;

auto input_x = PrepareData(x, kernel.InputAt(0), {});
Expand Down Expand Up @@ -693,8 +700,9 @@ std::vector<Tensor> split_impl(const Tensor& x,
DataLayout kernel_layout = kernel_key.layout();
DataType kernel_data_type = kernel_key.dtype();

auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"split", {kernel_backend, kernel_layout, kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << "split API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "split API kernel: " << kernel;
Expand Down Expand Up @@ -774,8 +782,9 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
if (grad.is_selected_rows()) {
kernel_name = "momentum_dense_param_sparse_grad";
}
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name, {kernel_backend, kernel_layout, kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << kernel_name << " API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << kernel_name << " API kernel: " << kernel;
Expand Down Expand Up @@ -906,8 +915,9 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> batch_norm_impl(
}
}

const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"batch_norm", {kernel_backend, kernel_layout, kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << "batch_norm API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "batch_norm API kernel: " << kernel;
Expand Down Expand Up @@ -1004,8 +1014,9 @@ void imag_grad_impl(const Tensor& out_grad, Tensor* x_grad) {
phi::KernelKey kernel_key{ParseBackend(out_grad),
out_grad.layout(),
phi::dtype::ToComplex(out_grad.dtype())};
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"imag_grad", kernel_key);
const auto& kernel = kernel_result.kernel;

VLOG(6) << "imag_grad API kernel key: " << kernel_key;
VLOG(6) << "imag_grad API kernel: " << kernel;
Expand Down Expand Up @@ -1042,10 +1053,11 @@ void embedding_grad_impl(const Tensor& x,
if (phi::DenseTensor::classof(weight.impl().get())) {
std::string kernel_name =
sparse ? "embedding_sparse_grad" : "embedding_grad";
const auto& kernel =
auto kernel_result =
phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name,
{kernel_key.backend(), kernel_key.layout(), kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << kernel_name << " API kernel: " << kernel;

auto input_x = PrepareData(x, kernel.InputAt(0), {});
Expand Down Expand Up @@ -1094,10 +1106,11 @@ void embedding_grad_impl(const Tensor& x,
} else {
std::string kernel_name = sparse ? "sparse_weight_embedding_sparse_grad"
: "sparse_weight_embedding_grad";
const auto& kernel =
auto kernel_result =
phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name,
{kernel_key.backend(), kernel_key.layout(), kernel_data_type});
const auto& kernel = kernel_result.kernel;
VLOG(6) << kernel_name << " API kernel: " << kernel;

auto input_x = PrepareData(x, kernel.InputAt(0), {});
Expand Down Expand Up @@ -1148,8 +1161,9 @@ void real_grad_impl(const Tensor& out_grad, Tensor* x_grad) {
phi::KernelKey kernel_key{ParseBackend(out_grad),
out_grad.layout(),
phi::dtype::ToComplex(out_grad.dtype())};
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"real_grad", kernel_key);
const auto& kernel = kernel_result.kernel;

VLOG(6) << "real_grad API kernel key: " << kernel_key;
VLOG(6) << "real_grad API kernel: " << kernel;
Expand Down
25 changes: 25 additions & 0 deletions paddle/phi/api/lib/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,5 +294,30 @@ paddle::optional<std::vector<phi::DenseTensor>> PrepareData(
return paddle::none;
}

void TransDataBackend(const phi::DenseTensor* tensor,
Backend target_backend,
phi::DenseTensor* out) {
if (tensor) {
*out = TransDataPlace(*tensor, phi::TransToPhiPlace(target_backend));
}
}

void TransDataBackend(const std::vector<phi::DenseTensor*>& tensors,
Backend target_backend,
std::vector<phi::DenseTensor*> outs) {
size_t n = tensors.size();
for (size_t i = 0; i < n; ++i) {
TransDataBackend(tensors[i], target_backend, outs[i]);
}
}

void TransDataBackend(const phi::SelectedRows* tensor,
Backend target_backend,
phi::SelectedRows* out) {
if (tensor) {
TransDataBackend(&tensor->value(), target_backend, out->mutable_value());
}
}

} // namespace experimental
} // namespace paddle
13 changes: 13 additions & 0 deletions paddle/phi/api/lib/data_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License. */

#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/core/kernel_factory.h"
#include "paddle/phi/core/selected_rows.h"

namespace paddle {
namespace experimental {
Expand Down Expand Up @@ -81,5 +82,17 @@ paddle::optional<std::vector<phi::DenseTensor>> PrepareData(
const phi::TensorArgDef& target_args_def,
const TransformFlag& transform_flag);

void TransDataBackend(const phi::DenseTensor* tensor,
Backend target_backend,
phi::DenseTensor* out);

void TransDataBackend(const std::vector<phi::DenseTensor*>& tensor,
Backend target_backend,
std::vector<phi::DenseTensor*> out);

void TransDataBackend(const phi::SelectedRows* tensor,
Backend target_backend,
phi::SelectedRows* out);

} // namespace experimental
} // namespace paddle
9 changes: 6 additions & 3 deletions paddle/phi/api/lib/sparse_api_custom_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ Tensor to_sparse_coo_impl(const Tensor& x, const int64_t sparse_dim) {
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();

auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name, kernel_key);
const auto& kernel = kernel_result.kernel;

VLOG(6) << "add API kernel key: " << kernel_key;
VLOG(6) << "to API kernel: " << kernel;
Expand Down Expand Up @@ -95,8 +96,9 @@ Tensor to_sparse_csr_impl(const Tensor& x) {
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();

auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name, kernel_key);
const auto& kernel = kernel_result.kernel;

VLOG(6) << "add API kernel key: " << kernel_key;
VLOG(6) << "to API kernel: " << kernel;
Expand Down Expand Up @@ -157,8 +159,9 @@ Tensor to_dense_impl(const Tensor& x) {
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();

auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name, kernel_key);
const auto& kernel = kernel_result.kernel;

VLOG(6) << "add API kernel key: " << kernel_key;
VLOG(6) << "to API kernel: " << kernel;
Expand Down
15 changes: 12 additions & 3 deletions paddle/phi/api/yaml/generator/api_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,15 +685,21 @@ def gen_kernel_code(self, kernel_name, code_indent, inplace_flag=False):
outputs_args, kernel_output_names, output_create = self.gene_output(
self.outputs['types'], out_tensor_type_list, code_indent,
inplace_flag)
fallback_kernel_output_trans = ""
for kernel_out in outputs_args:
fallback_kernel_output_trans += (f"""
{code_indent} TransDataBackend({kernel_out}, kernel_backend, {kernel_out});"""
)
cudnn_args = '' if self.kernel[
'use_gpudnn'] == 'false' else ', ' + self.kernel['use_gpudnn']
return f"""
{code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{code_indent} const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} "{kernel_name}", {{kernel_backend, kernel_layout, kernel_data_type}}{cudnn_args});
{code_indent} const auto& kernel = kernel_result.kernel;
{code_indent} VLOG(6) << "{kernel_name} kernel: " << kernel;

{code_indent} auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
{code_indent} auto* dev_ctx = GetDeviceContextByBackend(kernel_result.has_fallback_cpu ? Backend::CPU : kernel_backend);
{input_tensors}
{output_create}
{self.gene_infer_meta(kernel_output_names, code_indent)}
Expand All @@ -702,7 +708,10 @@ def gen_kernel_code(self, kernel_name, code_indent, inplace_flag=False):
{code_indent} auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
{code_indent} {{
{code_indent} paddle::platform::RecordEvent kernel_record_event(\"{kernel_name} compute\", paddle::platform::TracerEventType::OperatorInner, 1);
{code_indent} (*kernel_fn)({kernel_args}, {outputs_args});
{code_indent} (*kernel_fn)({kernel_args}, {", ".join(outputs_args)});
{code_indent} }}
{code_indent} if (kernel_result.has_fallback_cpu) {{
{fallback_kernel_output_trans}
{code_indent} }}

{code_indent} {self.gene_return_code()}"""
Expand Down
7 changes: 3 additions & 4 deletions paddle/phi/api/yaml/generator/api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,13 @@ def gene_output(self,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
kernel_output = []
output_names = []
output_create = ""
return_type = self.get_return_type_with_intermediate(inplace_flag)

if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
kernel_output.append('kernel_out')
output_names.append('kernel_out')
inplace_assign = " = " + self.inplace_map[
self.outputs['names'][0]] if inplace_flag and self.outputs[
Expand Down Expand Up @@ -186,7 +186,7 @@ def gene_output(self,
output_create = output_create[:-2] + '};'

for i in range(len(out_dtype_list)):
kernel_output = kernel_output + f'kernel_out_{i}, '
kernel_output.append(f'kernel_out_{i}')
output_names.append(f'kernel_out_{i}')
set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[
i] == 'dense' else 'SetSelectedRowsKernelOutput'
Expand Down Expand Up @@ -214,7 +214,6 @@ def gene_output(self,
{code_indent} kernel_out_{i}->ShareInplaceVersionCounterWith(*{PREFIX_TENSOR_NAME}{self.view_map[self.outputs['names'][i]]});
{code_indent} VLOG(3) << "Perform View between Output and Input Tensor, share allocation and inplace version.";"""

kernel_output = kernel_output[:-2]
else:
raise ValueError(
"{} : Output error: the output should not be empty.".format(
Expand Down
7 changes: 3 additions & 4 deletions paddle/phi/api/yaml/generator/backward_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@ def gene_output(self,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
kernel_output = []
output_names = []
output_create = ""

if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
kernel_output.append('kernel_out')
output_names.append('kernel_out')
inplace_assign = " = " + self.inplace_map[self.outputs['names'][
0]] if inplace_flag and self.inplace_map is not None and self.outputs[
Expand All @@ -144,7 +144,7 @@ def gene_output(self,
elif len(out_dtype_list) > 1:
output_create = ""
for i, out_type_item in enumerate(out_dtype_list):
kernel_output = kernel_output + f'kernel_out_{i}, '
kernel_output.append(f'kernel_out_{i}')
output_names.append(f'kernel_out_{i}')
set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[
i] == 'dense' else 'SetSelectedRowsKernelOutput'
Expand All @@ -168,7 +168,6 @@ def gene_output(self,
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}(&{self.outputs['names'][i]});"""

kernel_output = kernel_output[:-2]
else:
raise ValueError(
"{} : Output error: the output should not be empty.".format(
Expand Down
12 changes: 6 additions & 6 deletions paddle/phi/api/yaml/generator/sparse_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def gene_output(self,
out_tensor_type_list=None,
code_indent='',
inplace_flag=False):
kernel_output = ""
kernel_output = []
output_names = []
output_create = ""
return_type = self.get_return_type_with_intermediate(inplace_flag)
Expand All @@ -47,7 +47,7 @@ def gene_output(self,
}

if len(out_dtype_list) == 1:
kernel_output = 'kernel_out'
kernel_output.append('kernel_out')
output_names.append('kernel_out')
inplace_assign = " = " + self.inplace_map[self.outputs['names'][
0]] if inplace_flag and self.inplace_map is not None and self.outputs[
Expand All @@ -73,12 +73,11 @@ def gene_output(self,
output_create = output_create[:-2] + '};'

for i in range(len(out_dtype_list)):
kernel_output = kernel_output + f'kernel_out_{i}, '
kernel_output.append(f'kernel_out_{i}')
output_names.append(f'kernel_out_{i}')
output_create = output_create + f"""
auto* kernel_out_{i} = SetSparseKernelOutput(&std::get<{i}>(api_output), {output_type_map[out_dtype_list[i]]});"""

kernel_output = kernel_output[:-2]
else:
raise ValueError(
"{} : Output error: the output should not be empty.".format(
Expand Down Expand Up @@ -147,11 +146,12 @@ def gen_sparse_kernel_code(self, kernel_name, inplace_flag=False):
self.gene_return_code()) == 0 else " " + self.gene_return_code()
return f"""
VLOG(6) << "{self.api} api sparse kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
auto phi_kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"{kernel_name}", {{kernel_backend, kernel_layout, kernel_data_type}});
const auto& phi_kernel = kernel_result.kernel;
VLOG(6) << "{self.api} api sparse kernel: " << phi_kernel;

auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto* dev_ctx = GetDeviceContextByBackend(kernel_result.has_fallback_cpu ? Backend::CPU : kernel_backend);
auto kernel_context = phi::KernelContext(dev_ctx);
{output_create}
{kernel_context_code}
Expand Down
Loading