Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Paddle-TRT][cherry pick] Slice to 2.3 #44757

Merged
merged 4 commits into from
Aug 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 143 additions & 60 deletions paddle/fluid/inference/tensorrt/convert/fc_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,51 +34,97 @@ namespace tensorrt {
class FcOpConverter : public OpConverter {
public:
nvinfer1::ILayer* reshape_before_fc(nvinfer1::ITensor* before_fc,
nvinfer1::Dims x_dim, int x_num_col_dims,
nvinfer1::Dims x_dim,
int x_num_col_dims,
std::string output_name) {
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
reshape_before_fc_dim.nbDims = x_num_col_dims + 3;
// padding shape "* x q x 1 x 1"
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 1;
}
for (int i = 0; i < x_dim.nbDims; i++) {
if (i < x_num_col_dims) {
reshape_before_fc_dim.d[i] = 0;
} else {
if (x_dim.d[i] < 0) {
reshape_before_fc_dim.d[x_num_col_dims] = -1;
break;

nvinfer1::ITensor* filal_reshape_before_fc_shape_tensor = nullptr;

if (!engine_->with_dynamic_shape()) {
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 1;
}
for (int i = 0; i < x_dim.nbDims; i++) {
if (i < x_num_col_dims) {
reshape_before_fc_dim.d[i] = 0;
} else {
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
}
}
} else {
std::vector<nvinfer1::ITensor*> reshape_before_fc_shape_tensor;
nvinfer1::ITensor* input_shape_tensor = Shape(before_fc);

for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
reshape_before_fc_shape_tensor.push_back(Add1DConstantLayer(1));
}
for (int i = 0; i < x_dim.nbDims; i++) {
if (i < x_num_col_dims) {
reshape_before_fc_shape_tensor[i] =
GetEleTensorOfShape(input_shape_tensor, i);
} else {
reshape_before_fc_shape_tensor[x_num_col_dims] =
Prod(GetEleTensorOfShape(input_shape_tensor, i),
reshape_before_fc_shape_tensor[x_num_col_dims]);
// If not set, test_trt_matmul_quant_dequant in trt 6015 will fail
reshape_before_fc_shape_tensor[x_num_col_dims]->setType(
nvinfer1::DataType::kINT32);
}
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
}
filal_reshape_before_fc_shape_tensor =
Concat(reshape_before_fc_shape_tensor);
}

auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *before_fc);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
if (!engine_->with_dynamic_shape()) {
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
} else {
reshape_before_fc_layer->setInput(1,
*filal_reshape_before_fc_shape_tensor);
}
reshape_before_fc_layer->setName(
("fc_op_reshape_before_fc: Shuffle (Output: " + output_name + ")")
.c_str());
return reshape_before_fc_layer;
}

nvinfer1::ILayer* reshape_after_fc(nvinfer1::ITensor* after_fc,
nvinfer1::Dims x_dim, int x_num_col_dims) {
nvinfer1::Dims x_dim,
int x_num_col_dims) {
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;

nvinfer1::ITensor* filal_reshape_after_fc_shape_tensor = nullptr;
if (!engine_->with_dynamic_shape()) {
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;
}
} else {
std::vector<int> gather_indices(x_num_col_dims + 1);
std::iota(gather_indices.begin(), gather_indices.end(), 0);
filal_reshape_after_fc_shape_tensor =
Gather(Shape(after_fc), gather_indices);
}

auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *after_fc);
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
if (!engine_->with_dynamic_shape()) {
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
} else {
reshape_after_fc_layer->setInput(1, *filal_reshape_after_fc_shape_tensor);
}
return reshape_after_fc_layer;
}

void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias";
framework::OpDesc op_desc(op, nullptr);
auto output_name = op_desc.Output("Out").front();
Expand All @@ -96,8 +142,9 @@ class FcOpConverter : public OpConverter {
// Declare weights
auto* Y_v = scope.FindVar(op_desc.Input(w_name).front());
PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound(
"Can not find %s presistale var of fc in scope.", w_name));
Y_v,
platform::errors::NotFound(
"Can not find %s presistale var of fc in scope.", w_name));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
int x_num_col_dims =
op_desc.HasAttr("x_num_col_dims")
Expand Down Expand Up @@ -128,7 +175,8 @@ class FcOpConverter : public OpConverter {
}
weight_data = engine_->GetWeightCPUData(op_desc.Input(w_name).front(), Y_t);

PADDLE_ENFORCE_EQ(Y_t->dims().size(), 2UL,
PADDLE_ENFORCE_EQ(Y_t->dims().size(),
2UL,
platform::errors::InvalidArgument(
"The fc's weight should be a matrix with 2 dims, but "
"it's %d-dimensional.",
Expand All @@ -143,25 +191,31 @@ class FcOpConverter : public OpConverter {
}
};

auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output,
auto regist_fc = [&](nvinfer1::ITensor* inputs,
int n_output,
TensorRTEngine::Weight& weight,
TensorRTEngine::Weight& bias) {
if (enable_int8 || support_int8) {
// add conv layer
float out_scale = 0;
if (enable_int8) {
PADDLE_ENFORCE_EQ(
op_desc.HasAttr("out_threshold"), true,
op_desc.HasAttr("out_threshold"),
true,
platform::errors::InvalidArgument(
"must have out threshold in fc layers in int8 mode"));
out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
} else {
out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("Out"));
}
nvinfer1::DimsHW nv_ksize(1, 1);
auto* fc_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
nv_ksize, weight.get(), bias.get());
auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
Convolution,
*inputs,
n_output,
nv_ksize,
weight.get(),
bias.get());
fc_layer_int8->setName(
("fc_op_int8_conv1x1: Convolution (Output: " + output_name + ")")
.c_str());
Expand All @@ -174,21 +228,29 @@ class FcOpConverter : public OpConverter {
.c_str());
engine_->SetTensorDynamicRange(fc_after_reshape_int8->getOutput(0),
out_scale);
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_after_reshape_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_fc_shuffle",
{output_name}, test_mode);
nvinfer1::IActivationLayer* relu_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_after_reshape_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8,
"relu_after_fc_shuffle",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_after_reshape_int8,
"fc_op_int8_reshape_after_fc: Shuffle",
{output_name}, test_mode);
{output_name},
test_mode);
}
} else {
// add fc layer
auto* fc_layer_float =
TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, n_output,
weight.get(), bias.get());
auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(engine_,
FullyConnected,
*inputs,
n_output,
weight.get(),
bias.get());
fc_layer_float->setName(
("fc_op_float: FullyConnected (Output: " + output_name + ")")
.c_str());
Expand All @@ -198,14 +260,20 @@ class FcOpConverter : public OpConverter {
fc_after_reshape_float->setName(
("float_reshape_after_fc: Shuffle (Output: " + output_name + ")")
.c_str());
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_after_reshape_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_float, "relu_after_fc_shuffle",
{output_name}, test_mode);
nvinfer1::IActivationLayer* relu_layer_float =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_after_reshape_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_float,
"relu_after_fc_shuffle",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_after_reshape_float, "shuffle_after_fc",
{output_name}, test_mode);
RreplenishLayerAndOutput(fc_after_reshape_float,
"shuffle_after_fc",
{output_name},
test_mode);
}
}
};
Expand Down Expand Up @@ -255,15 +323,20 @@ class FcOpConverter : public OpConverter {
if (enable_int8 || support_int8) {
// add conv1x1 layer
nvinfer1::DimsHW nv_ksize(1, 1);
auto* fc_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *X, n_output, nv_ksize,
weight.get(), bias.get());
auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
Convolution,
*X,
n_output,
nv_ksize,
weight.get(),
bias.get());
if (activation_type == "relu") {
fc_layer_int8->setName(
("ernie_fc_op_int8: Convolution (Output: " + output_name + ")")
.c_str());
PADDLE_ENFORCE_EQ(
op_desc.HasAttr("out_threshold"), true,
op_desc.HasAttr("out_threshold"),
true,
platform::errors::InvalidArgument(
"must have out threshold in fc layers in int8 mode"));
float out_scale = 0;
Expand All @@ -275,15 +348,20 @@ class FcOpConverter : public OpConverter {
}
engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0),
out_scale);
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_layer_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_ernie_fc_int8",
{output_name}, test_mode);
nvinfer1::IActivationLayer* relu_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_layer_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8,
"relu_after_ernie_fc_int8",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_layer_int8,
"ernie_fc_op_int8: Convolution",
{output_name}, test_mode);
{output_name},
test_mode);
}
} else {
// add fc layer
Expand All @@ -292,25 +370,30 @@ class FcOpConverter : public OpConverter {
if (activation_type == "relu") {
fc_layer_float->setName(
("ernie_fc_op_float: (Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_layer_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
nvinfer1::IActivationLayer* relu_layer_float =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_layer_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_float,
"relu_after_ernie_fc_float", {output_name},
"relu_after_ernie_fc_float",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_layer_float, "ernie_fc_op_float",
{output_name}, test_mode);
RreplenishLayerAndOutput(
fc_layer_float, "ernie_fc_op_float", {output_name}, test_mode);
}
}
} else { // need reshape input before and after fc
PADDLE_ENFORCE_GT(
x_dim.nbDims, x_num_col_dims,
x_dim.nbDims,
x_num_col_dims,
platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims > x_num_col_dims, but "
"x_dim.nbDims : %d, x_num_col_dims : %d.",
x_dim.nbDims, x_num_col_dims));
x_dim.nbDims,
x_num_col_dims));
auto* reshape_before_fc_layer =
reshape_before_fc(X, x_dim, x_num_col_dims, output_name);
auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);
Expand Down
Loading