Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[inference][trt] add reduce_all and reduce_any #53088

Merged
merged 15 commits into from
May 6, 2023
2 changes: 2 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2612,6 +2612,8 @@ USE_TRT_CONVERTER(reduce_max);
USE_TRT_CONVERTER(reduce_min);
USE_TRT_CONVERTER(reduce_sum);
USE_TRT_CONVERTER(reduce_prod);
USE_TRT_CONVERTER(reduce_any);
USE_TRT_CONVERTER(reduce_all);
USE_TRT_CONVERTER(tile);
USE_TRT_CONVERTER(conv3d);
USE_TRT_CONVERTER(conv3d_transpose);
Expand Down
78 changes: 78 additions & 0 deletions paddle/fluid/inference/tensorrt/convert/reduce_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ const std::unordered_map<std::string, std::vector<nvinfer1::ReduceOperation>>
{"reduce_max", {nvinfer1::ReduceOperation::kMAX}},
{"reduce_min", {nvinfer1::ReduceOperation::kMIN}},
{"reduce_prod", {nvinfer1::ReduceOperation::kPROD}},
{"reduce_any", {nvinfer1::ReduceOperation::kMAX}},
{"reduce_all", {nvinfer1::ReduceOperation::kMIN}},
};

class ReduceSumOpConverter : public ReduceOpConverter {
Expand Down Expand Up @@ -122,6 +124,80 @@ class ReduceProdOpConverter : public ReduceOpConverter {
ReduceProdOpConverter() { op_type = "reduce_prod"; }
};

class ReduceAnyOpConverter : public ReduceOpConverter {
public:
ReduceAnyOpConverter() { op_type = "reduce_any"; }
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer";
framework::OpDesc op_desc(op, nullptr);
auto reduce_type = ops_.find(op_type);
auto* x = engine_->GetITensor(op_desc.Input("X").front());
// Cast the DataType to float
nvinfer1::IReduceLayer* reduce_layer = nullptr;
auto* cast_layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *x);
cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
cast_layer->getOutput(0)->setType(nvinfer1::DataType::kINT32);

nvinfer1::Dims input_shape = x->getDimensions();
int input_dims = input_shape.nbDims;
// Discriminate DataType between int and bool.
bool keep_dim = PADDLE_GET_CONST(bool, op_desc.GetAttr("keep_dim"));
std::vector<int32_t> dim =
PADDLE_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("dim"));
bool reduce_all = PADDLE_GET_CONST(bool, op_desc.GetAttr("reduce_all"));

if (reduce_all) {
uint32_t reduce_dim = 0;
for (int i = 0; i < input_dims; ++i) {
reduce_dim |= 1 << i;
}
reduce_layer = TRT_ENGINE_ADD_LAYER(engine_,
Reduce,
*cast_layer->getOutput(0),
reduce_type->second.front(),
reduce_dim,
keep_dim);
} else {
auto CvtToBitMask = [&](const std::vector<int32_t>& dims) -> uint32_t {
uint32_t res = 0;
for (auto x : dims) {
if (x < 0) {
res |= 1 << (x + input_dims);
} else {
if (!engine_->with_dynamic_shape()) x = x - 1;
res |= 1 << x;
}
}
return res;
};
reduce_layer = TRT_ENGINE_ADD_LAYER(engine_,
Reduce,
*cast_layer->getOutput(0),
reduce_type->second.front(),
CvtToBitMask(dim),
keep_dim);
}

auto output_name = op_desc.Output("Out")[0];

auto* layer =
TRT_ENGINE_ADD_LAYER(engine_, Identity, *reduce_layer->getOutput(0));
layer->setOutputType(0, nvinfer1::DataType::kBOOL);
layer->getOutput(0)->setType(nvinfer1::DataType::kBOOL);
// Ensure that the output type and input type are consistent.
layer->getOutput(0)->setType(cast_layer->getInput(0)->getType());

RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode);
};
};

class ReduceAllOpConverter : public ReduceAnyOpConverter {
public:
ReduceAllOpConverter() { op_type = "reduce_all"; }
};

} // namespace tensorrt
} // namespace inference
} // namespace paddle
Expand All @@ -131,3 +207,5 @@ REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_max, ReduceMaxOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_min, ReduceMinOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_prod, ReduceProdOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_any, ReduceAnyOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_all, ReduceAllOpConverter);
37 changes: 30 additions & 7 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2175,7 +2175,8 @@ struct SimpleOpTypeSetTeller : public Teller {

if (op_type == "reduce_sum" || op_type == "reduce_mean" ||
op_type == "reduce_max" || op_type == "reduce_min" ||
op_type == "reduce_prod") {
op_type == "reduce_prod" || op_type == "reduce_any" ||
op_type == "reduce_all") {
if (!desc.HasAttr("dim", /*with_attr_var=*/false)) {
VLOG(3) << "Skip to convert into TRT while found Attribute('dim') is "
"Variable type in "
Expand Down Expand Up @@ -2216,14 +2217,28 @@ struct SimpleOpTypeSetTeller : public Teller {
return false;
}

#if IS_TRT_VERSION_LT(7000)
auto dtype = x_var_desc->GetDataType();
if (dtype != framework::proto::VarType::FP32) {
VLOG(3) << "reduce op input data type must be float32 using TensorRT "
"< 7.0";
return false;
}
if (op_type == "reduce_all" || op_type == "reduce_any") {
if (dtype != framework::proto::VarType::BOOL) {
VLOG(3)
<< "reduce_all and reduce_any op input data type must be bool";
return false;
}
} else {
#if IS_TRT_VERSION_GE(7000)
if (dtype != framework::proto::VarType::INT32 &&
dtype != framework::proto::VarType::FP32) {
VLOG(3) << "reduce op input data type must be int32 or float32";
return false;
}
#else
if (dtype != framework::proto::VarType::FP32) {
VLOG(3) << "reduce op input data type must be float32 using TensorRT "
"< 7.0";
return false;
}
#endif
}
}
#if IS_TRT_VERSION_GE(7000)
if (op_type == "tile") {
Expand Down Expand Up @@ -2786,8 +2801,12 @@ struct SimpleOpTypeSetTeller : public Teller {
"nearest_interp",
"anchor_generator",
"reduce_max",
"reduce_min",
"reduce_mean",
"reduce_sum",
"reduce_prod",
"reduce_any",
"reduce_all",
"conv3d",
"conv3d_transpose",
"mish",
Expand Down Expand Up @@ -2942,8 +2961,12 @@ struct SimpleOpTypeSetTeller : public Teller {
"nearest_interp",
"anchor_generator",
"reduce_max",
"reduce_min",
"reduce_mean",
"reduce_sum",
"reduce_prod",
"reduce_any",
"reduce_all",
"conv3d",
"conv3d_transpose",
"mish",
Expand Down
93 changes: 49 additions & 44 deletions test/ir/inference/test_trt_convert_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def generate_input1(dtype, attrs: List[Dict[str, Any]]):
return np.random.random([1, 3, 64, 64]).astype(np.float32)
elif dtype == 2:
return np.random.random([1, 3, 64, 64]).astype(np.int32)
elif dtype == 0:
return np.random.random([1, 3, 64, 64]).astype(np.bool_)

for keep_dim in [True, False]:
for dim in [
Expand All @@ -65,15 +67,24 @@ def generate_input1(dtype, attrs: List[Dict[str, Any]]):
[3, 4, 5],
]:
for reduce_all in [True, False]:
for out_dtype in [-1, 2, 5]:
for op_type in [
"reduce_max",
"reduce_min",
"reduce_mean",
"reduce_sum",
"reduce_prod",
]:
dics1 = [
for out_dtype in [-1, 0, 2, 5]:
if out_dtype != 0:
reduce_type_list = [
"reduce_max",
"reduce_min",
"reduce_mean",
"reduce_sum",
"reduce_prod",
]
else:
reduce_type_list = [
"reduce_all",
"reduce_any",
]

for op_type in reduce_type_list:

dics = [
{
"keep_dim": keep_dim,
"dim": dim,
Expand All @@ -83,46 +94,40 @@ def generate_input1(dtype, attrs: List[Dict[str, Any]]):
},
{},
]
dics2 = [

ops_config = [
{
"keep_dim": keep_dim,
"dim": dim,
"reduce_all": reduce_all,
"out_dtype": out_dtype,
"in_dtype": out_dtype,
},
{},
"op_type": op_type,
"op_inputs": {"X": ["input_data"]},
"op_outputs": {
"Out": ["reduce_output_data"]
},
"op_attrs": dics[0],
}
]
for dics in [dics1, dics2]:
ops_config = [
{
"op_type": op_type,
"op_inputs": {"X": ["input_data"]},
"op_outputs": {
"Out": ["reduce_output_data"]
},
"op_attrs": dics[0],
}
]
ops = self.generate_op_config(ops_config)

program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(
data_gen=partial(
generate_input1, out_dtype, dics
)
if op_type in ["reduce_any", "reduce_all"]:
ops_config[0]["outputs_dtype"] = {
"reduce_output_data": np.bool_
}
ops = self.generate_op_config(ops_config)

program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(
data_gen=partial(
generate_input1, out_dtype, dics
)
},
outputs=["reduce_output_data"],
)
)
},
outputs=["reduce_output_data"],
)

if not self.is_program_valid(program_config):
continue
if not self.is_program_valid(program_config):
continue

yield program_config
yield program_config

def sample_predictor_configs(
self, program_config
Expand Down