From a167f3123145fd0dccab9747936f1a599a1ffb62 Mon Sep 17 00:00:00 2001 From: Kalle Raiskila Date: Wed, 26 Jul 2023 11:56:18 +0200 Subject: [PATCH] Remove local tensor copies: matmul, matmulinteger --- src/nodes/matmul.h | 14 +++------ src/nodes/matmulinteger.h | 64 ++++++++++++--------------------------- 2 files changed, 24 insertions(+), 54 deletions(-) diff --git a/src/nodes/matmul.h b/src/nodes/matmul.h index c05c30d..2ea18d3 100644 --- a/src/nodes/matmul.h +++ b/src/nodes/matmul.h @@ -5,17 +5,12 @@ class MatMul : public Node { public: MatMul() { op_name = "MatMul"; - A=B=Y=NULL; } - // inputs - const Tensor *A; - const Tensor *B; - // outputs - const Tensor *Y; - virtual void print(std::ostream &dst) const override { + Tensor *A = inputs[0]; + Tensor *B = inputs[1]; std::string type = A->data_type_str(); if( A->data_dim.size() != 2 ) @@ -44,8 +39,8 @@ class MatMul : public Node { } virtual void resolve(void) override { - A = inputs[0]; - B = inputs[1]; + Tensor *A = inputs[0]; + Tensor *B = inputs[1]; register_input(A, "A"); register_input(B, "B"); if( typeConstraint_highPrecisionNumeric(A) == false ) @@ -61,7 +56,6 @@ class MatMul : public Node { rv->data_dim.push_back(rows); rv->data_dim.push_back(cols); rv->data_type = A->data_type; - Y=rv; register_output(rv, "Y"); } diff --git a/src/nodes/matmulinteger.h b/src/nodes/matmulinteger.h index 6e5a7a3..eb8cf60 100644 --- a/src/nodes/matmulinteger.h +++ b/src/nodes/matmulinteger.h @@ -15,38 +15,13 @@ class MatMulInteger : public Node { public: MatMulInteger() { op_name = "MatMulInteger"; - A=B=Y=NULL; - a_zero_point=b_zero_point=NULL; } - // inputs - const Tensor *A; - const Tensor *B; - // optional inputs - const Tensor *a_zero_point; - const Tensor *b_zero_point; - // outputs - const Tensor *Y; - - virtual void print_parameters(std::ostream &dst, bool decorate ) const override - { - A->print_tensor_as_const(dst, !decorate); - dst << ", "; - B->print_tensor_as_const(dst, !decorate); - dst << ", "; - if( a_zero_point ) { - a_zero_point->print_tensor_as_const(dst, !decorate); - dst << ", "; - } - if( b_zero_point ) { - b_zero_point->print_tensor_as_const(dst, !decorate); - dst << ", "; - } - Y->print_tensor(dst, !decorate); - } - virtual void print(std::ostream &dst) const override { + Tensor *A = inputs[0]; + Tensor *B = inputs[1]; + Tensor *Y = outputs[0]; std::string intype = A->data_type_str(); std::string outtype = Y->data_type_str(); std::string weighttype = B->data_type_str(); @@ -68,24 +43,27 @@ class MatMulInteger : public Node { if( inner != inner2 ) ERROR("MatMulInteger input's inner dimensions don't match"); - if( a_zero_point ) - a_zero = a_zero_point->cname() + "[0]"; + if( inputs.size() > 2) + a_zero = "a_zero_point[0]"; else a_zero = "0"; - if( b_zero_point ) - b_zero = b_zero_point->cname() + "[0]"; + if( inputs.size() > 3) + b_zero = "b_zero_point[0]"; else b_zero = "0"; INDT_1 "/*MatMulInteger*/" << std::endl; - INDT_1 << intype << " *A = (" << intype << "*)" << A->cname() << ";" << std::endl; - INDT_1 << weighttype << " *B = (" << weighttype << "*)" << B->cname() << ";" << std::endl; - INDT_1 << outtype << " *Y = (" << outtype << "*)" << Y->cname() << ";" << std::endl; + INDT_1 << intype << " *A = (" << intype << "*)input_A;" << std::endl; + INDT_1 << weighttype << " *B = (" << weighttype << "*)input_B;" << std::endl; + INDT_1 << outtype << " *Y = (" << outtype << "*)output_Y;" << std::endl; INDT_1 << "for( uint32_t r=0; r<" << rows << "; r++ )" << std::endl; INDT_2 << "for( uint32_t c=0; c<" << cols << "; c++ ) {" << std::endl; + // NB: quantization here is the experimental ONNXC quantization + // that is not only integers, but also scales the output to 8bits. + // This quantization terribly kludgy, and really should be removed if( options.quantize ) INDT_3 << "int32_t sum = 0;" << std::endl; else @@ -106,23 +84,22 @@ class MatMulInteger : public Node { } INDT_2 "}" << std::endl; - } virtual void resolve(void) override { - A = inputs[0]; - B = inputs[1]; + register_input(inputs[0], "input_A"); + register_input(inputs[1], "input_B"); if( inputs.size() > 2 ) { - a_zero_point = inputs[2]; + register_input(inputs[2], "a_zero_point"); /* There is no backend reference test for this case */ - if( a_zero_point->data_dim[0] != 1 ) + if( inputs[2]->data_dim[0] != 1 ) ERROR("Unimplemented: 1D zero_point input"); } if( inputs.size() > 3 ) { - b_zero_point = inputs[3]; - if( b_zero_point->data_dim[0] != 1 ) + register_input(inputs[3], "b_zero_point"); + if( inputs[3]->data_dim[0] != 1 ) ERROR("Unimplemented: 1D zero_point input"); } @@ -137,8 +114,7 @@ class MatMulInteger : public Node { rv->data_type = onnx::TensorProto_DataType_INT8; else rv->data_type = onnx::TensorProto_DataType_INT32; - Y=rv; - outputs.push_back(rv); + register_output(rv, "output_Y"); } void result_dim( const std::vector< Tensor*> &inputs, int32_t &rows, int32_t &cols) const