diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index a5526c7443e7d..7d59d0fc4bc7a 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2356,6 +2356,8 @@ USE_TRT_CONVERTER(reduce_max); USE_TRT_CONVERTER(reduce_min); USE_TRT_CONVERTER(reduce_sum); USE_TRT_CONVERTER(reduce_prod); +USE_TRT_CONVERTER(reduce_any); +USE_TRT_CONVERTER(reduce_all); USE_TRT_CONVERTER(tile); USE_TRT_CONVERTER(conv3d); USE_TRT_CONVERTER(conv3d_transpose); diff --git a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc index 637b0a662f79a..ebf482b78d958 100644 --- a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc @@ -105,6 +105,8 @@ const std::unordered_map> {"reduce_max", {nvinfer1::ReduceOperation::kMAX}}, {"reduce_min", {nvinfer1::ReduceOperation::kMIN}}, {"reduce_prod", {nvinfer1::ReduceOperation::kPROD}}, + {"reduce_any", {nvinfer1::ReduceOperation::kMAX}}, + {"reduce_all", {nvinfer1::ReduceOperation::kMIN}}, }; class ReduceSumOpConverter : public ReduceOpConverter { @@ -132,6 +134,80 @@ class ReduceProdOpConverter : public ReduceOpConverter { ReduceProdOpConverter() { op_type = "reduce_prod"; } }; +class ReduceAnyOpConverter : public ReduceOpConverter { + public: + ReduceAnyOpConverter() { op_type = "reduce_any"; } + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer"; + framework::OpDesc op_desc(op, nullptr); + auto reduce_type = ops_.find(op_type); + auto* x = engine_->GetITensor(op_desc.Input("X").front()); + // Cast the DataType to float + nvinfer1::IReduceLayer* reduce_layer = nullptr; + auto* cast_layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *x); + cast_layer->setOutputType(0, nvinfer1::DataType::kINT32); + cast_layer->getOutput(0)->setType(nvinfer1::DataType::kINT32); + + nvinfer1::Dims input_shape = x->getDimensions(); + int input_dims = input_shape.nbDims; + // Discriminate DataType between int and bool. + bool keep_dim = PADDLE_GET_CONST(bool, op_desc.GetAttr("keep_dim")); + std::vector dim = + PADDLE_GET_CONST(std::vector, op_desc.GetAttr("dim")); + bool reduce_all = PADDLE_GET_CONST(bool, op_desc.GetAttr("reduce_all")); + + if (reduce_all) { + uint32_t reduce_dim = 0; + for (int i = 0; i < input_dims; ++i) { + reduce_dim |= 1 << i; + } + reduce_layer = TRT_ENGINE_ADD_LAYER(engine_, + Reduce, + *cast_layer->getOutput(0), + reduce_type->second.front(), + reduce_dim, + keep_dim); + } else { + auto CvtToBitMask = [&](const std::vector& dims) -> uint32_t { + uint32_t res = 0; + for (auto x : dims) { + if (x < 0) { + res |= 1 << (x + input_dims); + } else { + if (!engine_->with_dynamic_shape()) x = x - 1; + res |= 1 << x; + } + } + return res; + }; + reduce_layer = TRT_ENGINE_ADD_LAYER(engine_, + Reduce, + *cast_layer->getOutput(0), + reduce_type->second.front(), + CvtToBitMask(dim), + keep_dim); + } + + auto output_name = op_desc.Output("Out")[0]; + + auto* layer = + TRT_ENGINE_ADD_LAYER(engine_, Identity, *reduce_layer->getOutput(0)); + layer->setOutputType(0, nvinfer1::DataType::kBOOL); + layer->getOutput(0)->setType(nvinfer1::DataType::kBOOL); + // Ensure that the output type and input type are consistent. + layer->getOutput(0)->setType(cast_layer->getInput(0)->getType()); + + RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode); + }; +}; + +class ReduceAllOpConverter : public ReduceAnyOpConverter { + public: + ReduceAllOpConverter() { op_type = "reduce_all"; } +}; + } // namespace tensorrt } // namespace inference } // namespace paddle @@ -141,3 +217,5 @@ REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter); REGISTER_TRT_OP_CONVERTER(reduce_max, ReduceMaxOpConverter); REGISTER_TRT_OP_CONVERTER(reduce_min, ReduceMinOpConverter); REGISTER_TRT_OP_CONVERTER(reduce_prod, ReduceProdOpConverter); +REGISTER_TRT_OP_CONVERTER(reduce_any, ReduceAnyOpConverter); +REGISTER_TRT_OP_CONVERTER(reduce_all, ReduceAllOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 6880412e8d63e..fab17348e6948 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2089,7 +2089,8 @@ struct SimpleOpTypeSetTeller : public Teller { if (op_type == "reduce_sum" || op_type == "reduce_mean" || op_type == "reduce_max" || op_type == "reduce_min" || - op_type == "reduce_prod") { + op_type == "reduce_prod" || op_type == "reduce_any" || + op_type == "reduce_all") { if (!desc.HasAttr("dim", /*with_attr_var=*/false)) { VLOG(3) << "Skip to convert into TRT while found Attribute('dim') is " "Variable type in " @@ -2131,19 +2132,27 @@ struct SimpleOpTypeSetTeller : public Teller { } auto dtype = x_var_desc->GetDataType(); + if (op_type == "reduce_all" || op_type == "reduce_any") { + if (dtype != framework::proto::VarType::BOOL) { + VLOG(3) + << "reduce_all and reduce_any op input data type must be bool"; + return false; + } + } else { #if IS_TRT_VERSION_GE(7000) - if (dtype != framework::proto::VarType::INT32 && - dtype != framework::proto::VarType::FP32) { - VLOG(3) << "reduce op input data type must be int32 or float32"; - return false; - } + if (dtype != framework::proto::VarType::INT32 && + dtype != framework::proto::VarType::FP32) { + VLOG(3) << "reduce op input data type must be int32 or float32"; + return false; + } #else - if (dtype != framework::proto::VarType::FP32) { - VLOG(3) << "reduce op input data type must be float32 using TensorRT " - "< 7.0"; - return false; - } + if (dtype != framework::proto::VarType::FP32) { + VLOG(3) << "reduce op input data type must be float32 using TensorRT " + "< 7.0"; + return false; + } #endif + } } #if IS_TRT_VERSION_GE(7000) if (op_type == "tile") { @@ -2534,8 +2543,12 @@ struct SimpleOpTypeSetTeller : public Teller { "nearest_interp", "anchor_generator", "reduce_max", + "reduce_min", "reduce_mean", "reduce_sum", + "reduce_prod", + "reduce_any", + "reduce_all", "conv3d", "conv3d_transpose", "mish", @@ -2683,8 +2696,12 @@ struct SimpleOpTypeSetTeller : public Teller { "nearest_interp", "anchor_generator", "reduce_max", + "reduce_min", "reduce_mean", "reduce_sum", + "reduce_prod", + "reduce_any", + "reduce_all", "conv3d", "conv3d_transpose", "mish", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce.py index 1c04e40fe6d87..5db38121c5f3c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce.py @@ -51,6 +51,8 @@ def generate_input1(dtype, attrs: List[Dict[str, Any]]): return np.random.random([1, 3, 64, 64]).astype(np.float32) elif dtype == 2: return np.random.random([1, 3, 64, 64]).astype(np.int32) + elif dtype == 0: + return np.random.random([1, 3, 64, 64]).astype(np.bool_) for keep_dim in [True, False]: for dim in [ @@ -65,15 +67,24 @@ def generate_input1(dtype, attrs: List[Dict[str, Any]]): [3, 4, 5], ]: for reduce_all in [True, False]: - for out_dtype in [-1, 2, 5]: - for op_type in [ - "reduce_max", - "reduce_min", - "reduce_mean", - "reduce_sum", - "reduce_prod", - ]: - dics1 = [ + for out_dtype in [-1, 0, 2, 5]: + if out_dtype != 0: + reduce_type_list = [ + "reduce_max", + "reduce_min", + "reduce_mean", + "reduce_sum", + "reduce_prod", + ] + else: + reduce_type_list = [ + "reduce_all", + "reduce_any", + ] + + for op_type in reduce_type_list: + + dics = [ { "keep_dim": keep_dim, "dim": dim, @@ -83,46 +94,40 @@ def generate_input1(dtype, attrs: List[Dict[str, Any]]): }, {}, ] - dics2 = [ + + ops_config = [ { - "keep_dim": keep_dim, - "dim": dim, - "reduce_all": reduce_all, - "out_dtype": out_dtype, - "in_dtype": out_dtype, - }, - {}, + "op_type": op_type, + "op_inputs": {"X": ["input_data"]}, + "op_outputs": { + "Out": ["reduce_output_data"] + }, + "op_attrs": dics[0], + } ] - for dics in [dics1, dics2]: - ops_config = [ - { - "op_type": op_type, - "op_inputs": {"X": ["input_data"]}, - "op_outputs": { - "Out": ["reduce_output_data"] - }, - "op_attrs": dics[0], - } - ] - ops = self.generate_op_config(ops_config) - - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={ - "input_data": TensorConfig( - data_gen=partial( - generate_input1, out_dtype, dics - ) + if op_type in ["reduce_any", "reduce_all"]: + ops_config[0]["outputs_dtype"] = { + "reduce_output_data": np.bool_ + } + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial( + generate_input1, out_dtype, dics ) - }, - outputs=["reduce_output_data"], - ) + ) + }, + outputs=["reduce_output_data"], + ) - if not self.is_program_valid(program_config): - continue + if not self.is_program_valid(program_config): + continue - yield program_config + yield program_config def sample_predictor_configs( self, program_config