From c07117d73ea5276d256100363d6b0f6cdac2cabb Mon Sep 17 00:00:00 2001 From: Zuzanna Gawrysiak Date: Tue, 8 Feb 2022 13:55:14 +0100 Subject: [PATCH 1/3] Quantize elementwise mul op --- .../framework/ir/graph_pattern_detector.cc | 16 +++++ .../framework/ir/graph_pattern_detector.h | 15 ++++ .../framework/ir/mkldnn/cpu_quantize_pass.cc | 70 +++++++++++++++++++ .../framework/ir/mkldnn/cpu_quantize_pass.h | 1 + .../ir/mkldnn/cpu_quantize_pass_tester.cc | 47 ++++++++++++- .../ir/mkldnn/cpu_quantize_placement_pass.cc | 8 +-- 6 files changed, 151 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 18068e22b7f3c..5288c516e158c 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2068,6 +2068,22 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) { return out_var; } +PDNode *patterns::ElementwiseMul::operator()(PDNode *x_var, PDNode *y_var) { + auto elementwise_mul_op = pattern->NewNode(elementwise_mul_op_repr()) + ->assert_is_op("elementwise_mul"); + + x_var->AsInput()->assert_is_op_input("elementwise_mul", "X"); + y_var->AsInput()->assert_is_op_input("elementwise_mul", "Y"); + auto out_var = pattern->NewNode(elementwise_mul_out_repr()) + ->AsOutput() + ->assert_is_op_output("elementwise_mul", "Out"); + + elementwise_mul_op->LinksFrom({x_var, y_var}); + elementwise_mul_op->LinksTo({out_var}); + + return out_var; +} + PDNode *patterns::Concat::operator()() { auto concat_op = pattern->NewNode(concat_op_repr())->assert_is_op("concat"); diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 062d2f9dedce6..528cd91d12091 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -1032,6 +1032,21 @@ struct ElementwiseAdd : public PatternBase { PATTERN_DECL_NODE(elementwise_add_out); }; +// ElementwiseMul op +// Forward pass for element-wise multiplication +// elementwise_mul_out is the result of the operator +struct ElementwiseMul : public PatternBase { + ElementwiseMul(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "elementwise_mul") {} + + PDNode* operator()(PDNode* x_var, PDNode* y_var); + + PATTERN_DECL_NODE(elementwise_mul_op); + PATTERN_DECL_NODE(elementwise_mul_x); + PATTERN_DECL_NODE(elementwise_mul_y); + PATTERN_DECL_NODE(elementwise_mul_out); +}; + // Transpose op // Forward pass for transpose. // transpose_out is a result of the operator. diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index 371482b5343d6..cb9f48eabf397 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -877,6 +877,75 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const { quantize_elementwise_add_count); } +void CPUQuantizePass::QuantizeElementwiseMul(Graph* graph) const { + GraphPatternDetector gpd; + auto pattern = gpd.mutable_pattern(); + patterns::ElementwiseMul elementwise_mul_pattern{pattern, name_scope_}; + + elementwise_mul_pattern( + pattern->NewNode(elementwise_mul_pattern.elementwise_mul_x_repr()), + pattern->NewNode(elementwise_mul_pattern.elementwise_mul_y_repr())); + + int quantize_elementwise_mul_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + VLOG(4) << "Quantize elementwise_mul op"; + GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_op, elementwise_mul_op, + elementwise_mul_pattern); + + // skip if should not be quantized + if (!platform::HasOpINT8DataType(elementwise_mul_op->Op())) { + LogQuantizationDisabled(elementwise_mul_op); + return; + } + + GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_x, elementwise_mul_x, + elementwise_mul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_y, elementwise_mul_y, + elementwise_mul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_out, elementwise_mul_out, + elementwise_mul_pattern); + + if (!AreScalesPresentForNodes( + {elementwise_mul_x, elementwise_mul_y, elementwise_mul_out})) { + LogCannotQuantizeOp(elementwise_mul_op); + return; + } + + bool is_x_unsigned{false}, is_y_unsigned{false}; + auto input_x_scale = + GetScaleValueForNode(elementwise_mul_x, &is_x_unsigned); + auto input_y_scale = + GetScaleValueForNode(elementwise_mul_y, &is_y_unsigned); + + // TODO(sfraczek): mul support for different signness + if (is_x_unsigned != is_y_unsigned) { + LogCannotQuantizeOp(elementwise_mul_op, + "ElementwiseMul inputs must be of the same type."); + return; + } + + QuantizeInput(g, elementwise_mul_op, elementwise_mul_x, "X", input_x_scale, + is_x_unsigned, "Scale_x"); + QuantizeInput(g, elementwise_mul_op, elementwise_mul_y, "Y", input_y_scale, + is_y_unsigned, "Scale_y"); + + bool is_output_unsigned{false}; + auto output_scale = + GetScaleValueForNode(elementwise_mul_out, &is_output_unsigned); + + DequantizeOutput(g, elementwise_mul_op, elementwise_mul_out, "Out", + output_scale, is_output_unsigned, "Scale_out"); + + ++quantize_elementwise_mul_count; + }; + gpd(graph, handler); + AddStatis(quantize_elementwise_mul_count); + + PrettyLogDetail("--- quantized %d elementwise_mul ops", + quantize_elementwise_mul_count); +} + void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const { GraphPatternDetector gpd; patterns::FusionGru pattern{gpd.mutable_pattern(), name_scope_}; @@ -1147,6 +1216,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { QuantizeReshape(graph); QuantizeMatmul(graph); QuantizeElementwiseAdd(graph); + QuantizeElementwiseMul(graph); QuantizeFusionGru(graph); QuantizeMultiGru(graph); QuantizeFusionLSTM(graph); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h index 412c4e40a01d5..ebf3c3e65495e 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h @@ -58,6 +58,7 @@ class CPUQuantizePass : public FusePassBase { void QuantizeReshape(Graph* graph) const; void QuantizeMatmul(Graph* graph) const; void QuantizeElementwiseAdd(Graph* graph) const; + void QuantizeElementwiseMul(Graph* graph) const; void QuantizeFusionGru(Graph* graph) const; void QuantizeMultiGru(Graph* graph) const; void QuantizeFusionLSTM(Graph* graph) const; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 889417b78c864..0ce5aba476e30 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -90,7 +90,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_out", 1.0f); - } else if (type == "elementwise_add") { + } else if (type == "elementwise_add" || type == "elementwise_mul") { op->SetInput("X", {inputs[0]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); op->SetOutput("Out", {outputs[0]}); @@ -167,7 +167,8 @@ void CheckScales(const OpDesc* op, float scale, float shift) { scale); scale_names.push_back("Scale_in"); scale_names.push_back("Scale_out"); - } else if (type == "matmul" || type == "elementwise_add") { + } else if (type == "matmul" || type == "elementwise_add" || + type == "elementwise_mul") { scale_names.push_back("Scale_x"); scale_names.push_back("Scale_y"); scale_names.push_back("Scale_out"); @@ -588,6 +589,48 @@ TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) { expected_operators, added_nodes, 1.f, 1.f, "", "b"); } +static const std::initializer_list variable_names_elementwise_mul = + {"a", "b", "c", "d", "e", "f"}; + +ProgramDesc BuildProgramDescElementwiseMul() { + ProgramDesc prog; + for (auto& v : variable_names_elementwise_mul) { + prog.MutableBlock(0)->Var(v); + } + SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); + SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); + SetOp(&prog, "elementwise_mul", "ElementwiseMul", {"b", "d"}, {"e"}, true, + "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); + + return prog; +} + +TEST(CpuQuantizePass, elementwise_mul) { + // 2 Quant + 2 IN + 1 DeQuant + 1 OUT + int added_nodes = 6; + std::unordered_map expected_operators = { + {"elementwise_mul", 1}, {"quantize", 2}, {"dequantize", 3}}; + MainTest(BuildProgramDescElementwiseMul(), variable_names_elementwise_mul, + expected_operators, added_nodes, SCALE * S8_MAX); +} + +TEST(CpuQuantizePass, elementwise_mul_output_scale_missing) { + int added_nodes = 0; + std::unordered_map expected_operators = { + {"elementwise_mul", 1}, {"quantize", 0}, {"dequantize", 2}}; + MainTest(BuildProgramDescElementwiseMul(), variable_names_elementwise_mul, + expected_operators, added_nodes, 1.f, 1.f, "e"); +} + +TEST(CpuQuantizePass, elementwise_mul_unsigned_and_signed_input) { + int added_nodes = 0; + std::unordered_map expected_operators = { + {"elementwise_mul", 1}, {"quantize", 0}, {"dequantize", 2}}; + MainTest(BuildProgramDescElementwiseMul(), variable_names_elementwise_mul, + expected_operators, added_nodes, 1.f, 1.f, "", "b"); +} + const std::vector churn_out_vars(ProgramDesc* prog, const std::string& prefix, int number) { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc index 5f74b61ee86aa..3b883dac9782a 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc @@ -26,10 +26,10 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Marks operators which are to be quantized."; std::unordered_set supported_op_types = std::unordered_set( - {"concat", "conv2d", "depthwise_conv2d", "elementwise_add", "fc", - "matmul", "nearest_interp", "nearest_interp_v2", "pool2d", - "prior_box", "reshape2", "transpose2", "fusion_gru", "fusion_lstm", - "multi_gru", "slice"}); + {"concat", "conv2d", "depthwise_conv2d", "elementwise_add", + "elementwise_mul", "fc", "matmul", "nearest_interp", + "nearest_interp_v2", "pool2d", "prior_box", "reshape2", "transpose2", + "fusion_gru", "fusion_lstm", "multi_gru", "slice"}); const auto& excluded_ids_list = Get>("quantize_excluded_op_ids"); const auto& op_types_list = From c78cb67fa4994cd26565d9c222726155ddda7c11 Mon Sep 17 00:00:00 2001 From: Zuzanna Gawrysiak Date: Mon, 14 Mar 2022 22:08:03 +0100 Subject: [PATCH 2/3] Parametrize elementwise functions --- .../framework/ir/graph_pattern_detector.cc | 37 ++--- .../framework/ir/graph_pattern_detector.h | 37 ++--- .../conv_elementwise_add_mkldnn_fuse_pass.cc | 90 +++++------ .../framework/ir/mkldnn/cpu_quantize_pass.cc | 141 +++++------------- .../framework/ir/mkldnn/cpu_quantize_pass.h | 4 +- .../ir/mkldnn/cpu_quantize_pass_tester.cc | 81 +++++----- 6 files changed, 141 insertions(+), 249 deletions(-) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 5288c516e158c..164a13d1560f4 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2052,34 +2052,19 @@ PDNode *patterns::Pool::operator()() { return output_var; } -PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) { - auto elementwise_add_op = pattern->NewNode(elementwise_add_op_repr()) - ->assert_is_op("elementwise_add"); - - x_var->AsInput()->assert_is_op_input("elementwise_add", "X"); - y_var->AsInput()->assert_is_op_input("elementwise_add", "Y"); - auto out_var = pattern->NewNode(elementwise_add_out_repr()) - ->AsOutput() - ->assert_is_op_output("elementwise_add", "Out"); - - elementwise_add_op->LinksFrom({x_var, y_var}); - elementwise_add_op->LinksTo({out_var}); - - return out_var; -} - -PDNode *patterns::ElementwiseMul::operator()(PDNode *x_var, PDNode *y_var) { - auto elementwise_mul_op = pattern->NewNode(elementwise_mul_op_repr()) - ->assert_is_op("elementwise_mul"); - - x_var->AsInput()->assert_is_op_input("elementwise_mul", "X"); - y_var->AsInput()->assert_is_op_input("elementwise_mul", "Y"); - auto out_var = pattern->NewNode(elementwise_mul_out_repr()) +PDNode *patterns::Elementwise::operator()(PDNode *x_var, PDNode *y_var, + const std::string elementwise_type) { + auto elementwise_op = + pattern->NewNode(elementwise_op_repr())->assert_is_op(elementwise_type); + + x_var->AsInput()->assert_is_op_input(elementwise_type, "X"); + y_var->AsInput()->assert_is_op_input(elementwise_type, "Y"); + auto out_var = pattern->NewNode(elementwise_out_repr()) ->AsOutput() - ->assert_is_op_output("elementwise_mul", "Out"); + ->assert_is_op_output(elementwise_type, "Out"); - elementwise_mul_op->LinksFrom({x_var, y_var}); - elementwise_mul_op->LinksTo({out_var}); + elementwise_op->LinksFrom({x_var, y_var}); + elementwise_op->LinksTo({out_var}); return out_var; } diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 528cd91d12091..17c70ace301d3 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -1016,35 +1016,20 @@ struct Pool : public PatternBase { PATTERN_DECL_NODE(pool_output); }; -// ElementwiseAdd used in residual connections. -// y_var is used and convolution output. -// The operator is removed, when residual -// connection fusion is on. -struct ElementwiseAdd : public PatternBase { - ElementwiseAdd(PDPattern* pattern, const std::string& name_scope) - : PatternBase(pattern, name_scope, "elementwise_add") {} - - PDNode* operator()(PDNode* x_var, PDNode* y_var); - - PATTERN_DECL_NODE(elementwise_add_op); - PATTERN_DECL_NODE(elementwise_add_x); - PATTERN_DECL_NODE(elementwise_add_y); - PATTERN_DECL_NODE(elementwise_add_out); -}; - -// ElementwiseMul op -// Forward pass for element-wise multiplication +// Elementwise ops +// Forward pass for element-wise operators (add, mul) // elementwise_mul_out is the result of the operator -struct ElementwiseMul : public PatternBase { - ElementwiseMul(PDPattern* pattern, const std::string& name_scope) - : PatternBase(pattern, name_scope, "elementwise_mul") {} +struct Elementwise : public PatternBase { + Elementwise(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "elementwise") {} - PDNode* operator()(PDNode* x_var, PDNode* y_var); + PDNode* operator()(PDNode* x_var, PDNode* y_var, + const std::string elementwise_type); - PATTERN_DECL_NODE(elementwise_mul_op); - PATTERN_DECL_NODE(elementwise_mul_x); - PATTERN_DECL_NODE(elementwise_mul_y); - PATTERN_DECL_NODE(elementwise_mul_out); + PATTERN_DECL_NODE(elementwise_op); + PATTERN_DECL_NODE(elementwise_x); + PATTERN_DECL_NODE(elementwise_y); + PATTERN_DECL_NODE(elementwise_out); }; // Transpose op diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc index 2403e60df3918..ade4fc179b940 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc @@ -145,10 +145,11 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsX( patterns::Conv conv_pattern{pattern, name_scope}; auto conv_output = conv_pattern(); - patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope}; - elementwise_add_pattern( + patterns::Elementwise elementwise_pattern{pattern, name_scope}; + elementwise_pattern( conv_output, - pattern->NewNode(elementwise_add_pattern.elementwise_add_y_repr())); + pattern->NewNode(elementwise_pattern.elementwise_y_repr()), + "elementwise_add"); conv_output->AsIntermediate(); int found_conv_as_x_count = 0; @@ -160,16 +161,16 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsX( GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_identity, elementwise_add_y, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, - elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_op, elementwise_op, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_identity, elementwise_y, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_out, elementwise_out, + elementwise_pattern); - if (FindFuseOption(*conv_op, *elementwise_add_op) != FUSE_MKLDNN) return; + if (FindFuseOption(*conv_op, *elementwise_op) != FUSE_MKLDNN) return; - if (!IsReachable(g, elementwise_add_identity, conv_output)) return; + if (!IsReachable(g, elementwise_identity, conv_output)) return; if (HasFusedActivation(conv_op)) return; @@ -179,14 +180,14 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsX( return; } - conv_op->Op()->SetInput("ResidualData", {elementwise_add_identity->Name()}); - conv_op->Op()->SetOutput("Output", {elementwise_add_out->Name()}); + conv_op->Op()->SetInput("ResidualData", {elementwise_identity->Name()}); + conv_op->Op()->SetOutput("Output", {elementwise_out->Name()}); conv_op->Op()->SetAttr("fuse_residual_connection", true); - GraphSafeRemoveNodes(g, {conv_output, elementwise_add_op}); + GraphSafeRemoveNodes(g, {conv_output, elementwise_op}); - IR_NODE_LINK_TO(elementwise_add_identity, conv_op); - IR_NODE_LINK_TO(conv_op, elementwise_add_out); + IR_NODE_LINK_TO(elementwise_identity, conv_op); + IR_NODE_LINK_TO(conv_op, elementwise_out); found_conv_as_x_count++; }; @@ -212,10 +213,11 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsY( patterns::Conv conv_pattern{pattern, name_scope}; auto conv_output = conv_pattern(); - patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope}; - elementwise_add_pattern( - pattern->NewNode(elementwise_add_pattern.elementwise_add_x_repr()), - conv_output); + patterns::Elementwise elementwise_pattern{pattern, name_scope}; + elementwise_pattern( + pattern->NewNode(elementwise_pattern.elementwise_x_repr()), + conv_output, + "elementwise_add"); conv_output->AsIntermediate(); int found_conv_as_y_count = 0; @@ -227,16 +229,16 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsY( GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, - elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_op, elementwise_op, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_x, elementwise_x, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_out, elementwise_out, + elementwise_pattern); - if (FindFuseOption(*conv_op, *elementwise_add_op) != FUSE_MKLDNN) return; + if (FindFuseOption(*conv_op, *elementwise_op) != FUSE_MKLDNN) return; - if (!IsReachable(g, elementwise_add_x, conv_output)) return; + if (!IsReachable(g, elementwise_x, conv_output)) return; if (HasFusedActivation(conv_op)) return; @@ -246,14 +248,14 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsY( return; } - conv_op->Op()->SetInput("ResidualData", {elementwise_add_x->Name()}); - conv_op->Op()->SetOutput("Output", {elementwise_add_out->Name()}); + conv_op->Op()->SetInput("ResidualData", {elementwise_x->Name()}); + conv_op->Op()->SetOutput("Output", {elementwise_out->Name()}); conv_op->Op()->SetAttr("fuse_residual_connection", true); - GraphSafeRemoveNodes(g, {conv_output, elementwise_add_op}); + GraphSafeRemoveNodes(g, {conv_output, elementwise_op}); - IR_NODE_LINK_TO(elementwise_add_x, conv_op); - IR_NODE_LINK_TO(conv_op, elementwise_add_out); + IR_NODE_LINK_TO(elementwise_x, conv_op); + IR_NODE_LINK_TO(conv_op, elementwise_out); found_conv_as_y_count++; }; @@ -282,8 +284,8 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseProjectionConv( patterns::Conv conv_y_pattern{pattern, name_scope}; auto conv_y_output = conv_y_pattern(); - patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope}; - elementwise_add_pattern(conv_x_output, conv_y_output); + patterns::Elementwise elementwise_pattern{pattern, name_scope}; + elementwise_pattern(conv_x_output, conv_y_output, "elementwise_add"); conv_x_output->AsIntermediate(); conv_y_output->AsIntermediate(); @@ -301,10 +303,10 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseProjectionConv( GET_IR_NODE_FROM_SUBGRAPH(conv_y_filter, conv_filter, conv_y_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_y_output, conv_output, conv_y_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, - elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_op, elementwise_op, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_out, elementwise_out, + elementwise_pattern); if (!IsCompat(subgraph, g)) { LOG(WARNING) @@ -312,8 +314,8 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseProjectionConv( return; } - if (FindFuseOption(*conv_x_op, *elementwise_add_op) != FUSE_MKLDNN) return; - if (FindFuseOption(*conv_y_op, *elementwise_add_op) != FUSE_MKLDNN) return; + if (FindFuseOption(*conv_x_op, *elementwise_op) != FUSE_MKLDNN) return; + if (FindFuseOption(*conv_y_op, *elementwise_op) != FUSE_MKLDNN) return; Node* projection_node; Node* residual_conv_op; @@ -333,14 +335,14 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseProjectionConv( if (HasFusedActivation(residual_conv_op)) return; residual_conv_op->Op()->SetInput("ResidualData", {projection_node->Name()}); - residual_conv_op->Op()->SetOutput("Output", {elementwise_add_out->Name()}); + residual_conv_op->Op()->SetOutput("Output", {elementwise_out->Name()}); residual_conv_op->Op()->SetAttr("fuse_residual_connection", true); - GraphSafeRemoveNodes(g, {residual_conv_output, elementwise_add_op}); + GraphSafeRemoveNodes(g, {residual_conv_output, elementwise_op}); IR_NODE_LINK_TO(projection_node, residual_conv_op); - IR_NODE_LINK_TO(residual_conv_op, elementwise_add_out); + IR_NODE_LINK_TO(residual_conv_op, elementwise_out); found_projection_conv_count++; }; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index cb9f48eabf397..f4358fb243f20 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -807,143 +807,74 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { PrettyLogDetail("--- quantized %d matmul ops", quantize_matmul_count); } -void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const { +void CPUQuantizePass::QuantizeElementwise( + Graph* graph, const std::string elementwise_type) const { GraphPatternDetector gpd; auto pattern = gpd.mutable_pattern(); - patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope_}; + patterns::Elementwise elementwise_pattern{pattern, name_scope_}; - elementwise_add_pattern( - pattern->NewNode(elementwise_add_pattern.elementwise_add_x_repr()), - pattern->NewNode(elementwise_add_pattern.elementwise_add_y_repr())); + elementwise_pattern( + pattern->NewNode(elementwise_pattern.elementwise_x_repr()), + pattern->NewNode(elementwise_pattern.elementwise_y_repr()), + elementwise_type); - int quantize_elementwise_add_count = 0; + int quantize_elementwise_count = 0; auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { - VLOG(4) << "Quantize elementwise_add op"; - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, - elementwise_add_pattern); + VLOG(4) << "Quantize " + elementwise_type + " op"; + GET_IR_NODE_FROM_SUBGRAPH(elementwise_op, elementwise_op, + elementwise_pattern); // skip if should not be quantized - if (!platform::HasOpINT8DataType(elementwise_add_op->Op())) { - LogQuantizationDisabled(elementwise_add_op); + if (!platform::HasOpINT8DataType(elementwise_op->Op())) { + LogQuantizationDisabled(elementwise_op); return; } - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_y, elementwise_add_y, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, - elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_x, elementwise_x, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_y, elementwise_y, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_out, elementwise_out, + elementwise_pattern); if (!AreScalesPresentForNodes( - {elementwise_add_x, elementwise_add_y, elementwise_add_out})) { - LogCannotQuantizeOp(elementwise_add_op, + {elementwise_x, elementwise_y, elementwise_out})) { + LogCannotQuantizeOp(elementwise_op, "No scale available for the operator"); return; } bool is_x_unsigned{false}, is_y_unsigned{false}; - auto input_x_scale = - GetScaleValueForNode(elementwise_add_x, &is_x_unsigned); - auto input_y_scale = - GetScaleValueForNode(elementwise_add_y, &is_y_unsigned); + auto input_x_scale = GetScaleValueForNode(elementwise_x, &is_x_unsigned); + auto input_y_scale = GetScaleValueForNode(elementwise_y, &is_y_unsigned); // TODO(sfraczek): add support for different signness if (is_x_unsigned != is_y_unsigned) { - LogCannotQuantizeOp(elementwise_add_op, - "ElementwiseAdd inputs must be of the same type."); + LogCannotQuantizeOp(elementwise_op, + "Elementwise inputs must be of the same type."); return; } - QuantizeInput(g, elementwise_add_op, elementwise_add_x, "X", input_x_scale, + QuantizeInput(g, elementwise_op, elementwise_x, "X", input_x_scale, is_x_unsigned, "Scale_x"); - QuantizeInput(g, elementwise_add_op, elementwise_add_y, "Y", input_y_scale, + QuantizeInput(g, elementwise_op, elementwise_y, "Y", input_y_scale, is_y_unsigned, "Scale_y"); bool is_output_unsigned{false}; auto output_scale = - GetScaleValueForNode(elementwise_add_out, &is_output_unsigned); + GetScaleValueForNode(elementwise_out, &is_output_unsigned); - DequantizeOutput(g, elementwise_add_op, elementwise_add_out, "Out", - output_scale, is_output_unsigned, "Scale_out"); + DequantizeOutput(g, elementwise_op, elementwise_out, "Out", output_scale, + is_output_unsigned, "Scale_out"); - ++quantize_elementwise_add_count; + ++quantize_elementwise_count; }; gpd(graph, handler); - AddStatis(quantize_elementwise_add_count); + AddStatis(quantize_elementwise_count); - PrettyLogDetail("--- quantized %d elementwise_add ops", - quantize_elementwise_add_count); -} - -void CPUQuantizePass::QuantizeElementwiseMul(Graph* graph) const { - GraphPatternDetector gpd; - auto pattern = gpd.mutable_pattern(); - patterns::ElementwiseMul elementwise_mul_pattern{pattern, name_scope_}; - - elementwise_mul_pattern( - pattern->NewNode(elementwise_mul_pattern.elementwise_mul_x_repr()), - pattern->NewNode(elementwise_mul_pattern.elementwise_mul_y_repr())); - - int quantize_elementwise_mul_count = 0; - auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, - Graph* g) { - VLOG(4) << "Quantize elementwise_mul op"; - GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_op, elementwise_mul_op, - elementwise_mul_pattern); - - // skip if should not be quantized - if (!platform::HasOpINT8DataType(elementwise_mul_op->Op())) { - LogQuantizationDisabled(elementwise_mul_op); - return; - } - - GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_x, elementwise_mul_x, - elementwise_mul_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_y, elementwise_mul_y, - elementwise_mul_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_mul_out, elementwise_mul_out, - elementwise_mul_pattern); - - if (!AreScalesPresentForNodes( - {elementwise_mul_x, elementwise_mul_y, elementwise_mul_out})) { - LogCannotQuantizeOp(elementwise_mul_op); - return; - } - - bool is_x_unsigned{false}, is_y_unsigned{false}; - auto input_x_scale = - GetScaleValueForNode(elementwise_mul_x, &is_x_unsigned); - auto input_y_scale = - GetScaleValueForNode(elementwise_mul_y, &is_y_unsigned); - - // TODO(sfraczek): mul support for different signness - if (is_x_unsigned != is_y_unsigned) { - LogCannotQuantizeOp(elementwise_mul_op, - "ElementwiseMul inputs must be of the same type."); - return; - } - - QuantizeInput(g, elementwise_mul_op, elementwise_mul_x, "X", input_x_scale, - is_x_unsigned, "Scale_x"); - QuantizeInput(g, elementwise_mul_op, elementwise_mul_y, "Y", input_y_scale, - is_y_unsigned, "Scale_y"); - - bool is_output_unsigned{false}; - auto output_scale = - GetScaleValueForNode(elementwise_mul_out, &is_output_unsigned); - - DequantizeOutput(g, elementwise_mul_op, elementwise_mul_out, "Out", - output_scale, is_output_unsigned, "Scale_out"); - - ++quantize_elementwise_mul_count; - }; - gpd(graph, handler); - AddStatis(quantize_elementwise_mul_count); - - PrettyLogDetail("--- quantized %d elementwise_mul ops", - quantize_elementwise_mul_count); + PrettyLogDetail("--- quantized %d %s ops", quantize_elementwise_count, + elementwise_type); } void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const { @@ -1215,8 +1146,8 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { QuantizeFc(graph); QuantizeReshape(graph); QuantizeMatmul(graph); - QuantizeElementwiseAdd(graph); - QuantizeElementwiseMul(graph); + QuantizeElementwise(graph, "elementwise_add"); + QuantizeElementwise(graph, "elementwise_mul"); QuantizeFusionGru(graph); QuantizeMultiGru(graph); QuantizeFusionLSTM(graph); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h index ebf3c3e65495e..3a286264e41ff 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h @@ -57,8 +57,8 @@ class CPUQuantizePass : public FusePassBase { void QuantizeTranspose(Graph* graph) const; void QuantizeReshape(Graph* graph) const; void QuantizeMatmul(Graph* graph) const; - void QuantizeElementwiseAdd(Graph* graph) const; - void QuantizeElementwiseMul(Graph* graph) const; + void QuantizeElementwise(Graph* graph, + const std::string elementwise_type) const; void QuantizeFusionGru(Graph* graph) const; void QuantizeMultiGru(Graph* graph) const; void QuantizeFusionLSTM(Graph* graph) const; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 0ce5aba476e30..22000865948d6 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -547,88 +547,77 @@ TEST(CpuQuantizePass, matmul_not_quantized) { expected_operators, added_nodes, 1.0f); } -static const std::initializer_list variable_names_elementwise_add = - {"a", "b", "c", "d", "e", "f"}; +static const std::initializer_list variable_names_elementwise = { + "a", "b", "c", "d", "e", "f"}; -ProgramDesc BuildProgramDescElementwiseAdd() { +ProgramDesc BuildProgramDescElementwise(const std::string elementwise_type, + const std::string elementwise_name) { ProgramDesc prog; - for (auto& v : variable_names_elementwise_add) { + for (auto& v : variable_names_elementwise) { prog.MutableBlock(0)->Var(v); } SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); - SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true, + SetOp(&prog, elementwise_type, elementwise_name, {"b", "d"}, {"e"}, true, "int8"); SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); return prog; } -TEST(CpuQuantizePass, elementwise_add) { +void TestElementwise(const std::string elementwise_type, + const std::string elementwise_name) { // 2 Quant + 2 IN + 1 DeQuant + 1 OUT int added_nodes = 6; std::unordered_map expected_operators = { - {"elementwise_add", 1}, {"quantize", 2}, {"dequantize", 3}}; - MainTest(BuildProgramDescElementwiseAdd(), variable_names_elementwise_add, - expected_operators, added_nodes, SCALE * S8_MAX); + {elementwise_type, 1}, {"quantize", 2}, {"dequantize", 3}}; + MainTest(BuildProgramDescElementwise(elementwise_type, elementwise_name), + variable_names_elementwise, expected_operators, added_nodes, + SCALE * S8_MAX); } -TEST(CpuQuantizePass, elementwise_add_output_scale_missing) { +void TestElementwiseOutputScaleMissing(const std::string elementwise_type, + const std::string elementwise_name) { int added_nodes = 0; std::unordered_map expected_operators = { - {"elementwise_add", 1}, {"quantize", 0}, {"dequantize", 2}}; - MainTest(BuildProgramDescElementwiseAdd(), variable_names_elementwise_add, - expected_operators, added_nodes, 1.f, 1.f, "e"); + {elementwise_type, 1}, {"quantize", 0}, {"dequantize", 2}}; + MainTest(BuildProgramDescElementwise(elementwise_type, elementwise_name), + variable_names_elementwise, expected_operators, added_nodes, 1.f, + 1.f, "e"); } -TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) { +void TestElementwiseUnsignedAndSignedInput(const std::string elementwise_type, + const std::string elementwise_name) { int added_nodes = 0; std::unordered_map expected_operators = { - {"elementwise_add", 1}, {"quantize", 0}, {"dequantize", 2}}; - MainTest(BuildProgramDescElementwiseAdd(), variable_names_elementwise_add, - expected_operators, added_nodes, 1.f, 1.f, "", "b"); + {elementwise_type, 1}, {"quantize", 0}, {"dequantize", 2}}; + MainTest(BuildProgramDescElementwise(elementwise_type, elementwise_name), + variable_names_elementwise, expected_operators, added_nodes, 1.f, + 1.f, "", "b"); } -static const std::initializer_list variable_names_elementwise_mul = - {"a", "b", "c", "d", "e", "f"}; +TEST(CpuQuantizePass, elementwise_add) { + TestElementwise("elementwise_add", "ElementwiseAdd"); +} -ProgramDesc BuildProgramDescElementwiseMul() { - ProgramDesc prog; - for (auto& v : variable_names_elementwise_mul) { - prog.MutableBlock(0)->Var(v); - } - SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); - SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); - SetOp(&prog, "elementwise_mul", "ElementwiseMul", {"b", "d"}, {"e"}, true, - "int8"); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); +TEST(CpuQuantizePass, elementwise_add_output_scale_missing) { + TestElementwiseOutputScaleMissing("elementwise_add", "ElementwiseAdd"); +} - return prog; +TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) { + TestElementwiseUnsignedAndSignedInput("elementwise_add", "ElementwiseAdd"); } TEST(CpuQuantizePass, elementwise_mul) { - // 2 Quant + 2 IN + 1 DeQuant + 1 OUT - int added_nodes = 6; - std::unordered_map expected_operators = { - {"elementwise_mul", 1}, {"quantize", 2}, {"dequantize", 3}}; - MainTest(BuildProgramDescElementwiseMul(), variable_names_elementwise_mul, - expected_operators, added_nodes, SCALE * S8_MAX); + TestElementwise("elementwise_mul", "ElementwiseMul"); } TEST(CpuQuantizePass, elementwise_mul_output_scale_missing) { - int added_nodes = 0; - std::unordered_map expected_operators = { - {"elementwise_mul", 1}, {"quantize", 0}, {"dequantize", 2}}; - MainTest(BuildProgramDescElementwiseMul(), variable_names_elementwise_mul, - expected_operators, added_nodes, 1.f, 1.f, "e"); + TestElementwiseOutputScaleMissing("elementwise_mul", "ElementwiseMul"); } TEST(CpuQuantizePass, elementwise_mul_unsigned_and_signed_input) { - int added_nodes = 0; - std::unordered_map expected_operators = { - {"elementwise_mul", 1}, {"quantize", 0}, {"dequantize", 2}}; - MainTest(BuildProgramDescElementwiseMul(), variable_names_elementwise_mul, - expected_operators, added_nodes, 1.f, 1.f, "", "b"); + TestElementwiseUnsignedAndSignedInput("elementwise_mul", "ElementwiseMul"); } const std::vector churn_out_vars(ProgramDesc* prog, From b841c7108e14d09a8f7fa4ee553ac6e0856a3fb5 Mon Sep 17 00:00:00 2001 From: Zuzanna Gawrysiak Date: Tue, 15 Mar 2022 10:22:47 +0100 Subject: [PATCH 3/3] Fix code formatting --- .../ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc index ade4fc179b940..e8264a783c835 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc @@ -147,8 +147,7 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsX( patterns::Elementwise elementwise_pattern{pattern, name_scope}; elementwise_pattern( - conv_output, - pattern->NewNode(elementwise_pattern.elementwise_y_repr()), + conv_output, pattern->NewNode(elementwise_pattern.elementwise_y_repr()), "elementwise_add"); conv_output->AsIntermediate(); @@ -215,8 +214,7 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsY( patterns::Elementwise elementwise_pattern{pattern, name_scope}; elementwise_pattern( - pattern->NewNode(elementwise_pattern.elementwise_x_repr()), - conv_output, + pattern->NewNode(elementwise_pattern.elementwise_x_repr()), conv_output, "elementwise_add"); conv_output->AsIntermediate();