Refactor the cpu ops (#669)

* refactor the ref relu, ref pooling * refactor x86 * add op test, equal, neg
OAID · May 14, 2021 · e675d0b · e675d0b
1 parent 0938e25
commit e675d0b
Show file tree

Hide file tree

Showing 50 changed files with 3,875 additions and 3,783 deletions.
diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml
@@ -44,6 +44,9 @@ jobs:
         cd build
         lcov -d ./source -c -o lcov.info
         lcov -r lcov.info '/usr/*' -o lcov.info
+        lcov -r lcov.info '*fp16*' -o lcov.info
+        lcov -r lcov.info '*int8*' -o lcov.info
+        lcov -r lcov.info '*uint8*' -o lcov.info
         lcov --list lcov.info
     - name: codecov
       uses: codecov/codecov-action@v1.0.11

diff --git a/source/device/cpu/op/clip/clip_kernel_ref.h b/source/device/cpu/op/clip/clip_kernel_ref.h
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author:
+ */
+
+#ifndef __CLIP_KERNEL_REF_H__
+#define __CLIP_KERNEL_REF_H__
+
+
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+
+
+int ref_clip_fp32(struct tensor* input_tensor, struct tensor* output_tensor, float max, float min);
+
+int ref_clip_uint8(struct tensor* input_tensor, struct tensor* output_tensor, float max, float min);
+
+#endif
diff --git a/source/device/cpu/op/clip/clip_kernel_ref_fp32.c b/source/device/cpu/op/clip/clip_kernel_ref_fp32.c
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author:
+ */
+
+#include "clip_kernel_ref.h"
+
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+#include "module/module.h"
+#include "operator/op.h"
+#include "utility/float.h"
+#include "utility/sys_port.h"
+#include "utility/log.h"
+#include "device/cpu/cpu_node.h"
+#include "device/cpu/cpu_graph.h"
+#include "device/cpu/cpu_module.h"
+
+
+int ref_clip_fp32(struct tensor* input_tensor, struct tensor* output_tensor, float max, float min)
+{
+    int total_size = input_tensor->elem_num;
+    float* input_data = input_tensor->data;
+    float* out_data = output_tensor->data;
+
+    for (int i = 0; i < total_size; i++)
+    {
+        out_data[i] = input_data[i];
+
+        if (out_data[i] > max)
+            out_data[i] = max;
+        if (out_data[i] < min)
+            out_data[i] = min;
+    }
+
+    return 0;
+}
diff --git a/source/device/cpu/op/clip/clip_kernel_ref_uint8.c b/source/device/cpu/op/clip/clip_kernel_ref_uint8.c
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author:
+ */
+
+#include "clip_kernel_ref.h"
+
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+#include "module/module.h"
+#include "operator/op.h"
+#include "utility/float.h"
+#include "utility/sys_port.h"
+#include "utility/log.h"
+#include "device/cpu/cpu_node.h"
+#include "device/cpu/cpu_graph.h"
+#include "device/cpu/cpu_module.h"
+
+#include <math.h>
+
+
+int ref_clip_uint8(struct tensor* input_tensor, struct tensor* output_tensor, float max, float min)
+{
+    int total_size = input_tensor->elem_num;
+    uint8_t* input_uint8 = ( uint8_t* )input_tensor->data;
+    uint8_t* output_uint8 = ( uint8_t* )output_tensor->data;
+
+    float input_scale = input_tensor->scale;
+    float output_scale = output_tensor->scale;
+    int input_zero = input_tensor->zero_point;
+    int output_zero = output_tensor->zero_point;
+
+    /* input dequant */
+    float* input_fp32 = ( float* )sys_malloc(total_size * sizeof(float));
+    float* output_fp32 = ( float* )sys_malloc(total_size * sizeof(float));
+
+    for (uint32_t i = 0; i < input_tensor->elem_num; i++)
+        input_fp32[i] = ((float )input_uint8[i] - (float )input_zero) * input_scale;
+
+    for (int i = 0; i < total_size; i++)
+    {
+        output_fp32[i] = input_fp32[i];
+
+        if (output_fp32[i] > max)
+            output_fp32[i] = max;
+        if (output_fp32[i] < min)
+            output_fp32[i] = min;
+    }
+
+    /* output quant */
+    for (int i = 0; i < total_size; i++)
+    {
+        int output_data = (int)roundf(output_fp32[i] / output_scale) + output_zero;
+        output_uint8[i] = output_data > 255 ? 255 : output_data;
+    }
+
+    sys_free(input_fp32);
+    sys_free(output_fp32); 
+
+    return 0;
+}
diff --git a/source/device/cpu/op/clip/clip_ref.c b/source/device/cpu/op/clip/clip_ref.c
@@ -34,69 +34,9 @@
 #include "device/cpu/cpu_graph.h"
 #include "device/cpu/cpu_module.h"
 
-#include <math.h>
+#include "clip_kernel_ref.h"
 
 
-int ref_clip_fp32(struct tensor* input_tensor, struct tensor* output_tensor, float max, float min, int num_thread)
-{
-    int total_size = input_tensor->elem_num;
-    float* input_data = input_tensor->data;
-    float* out_data = output_tensor->data;
-
-    for (int i = 0; i < total_size; i++)
-    {
-        out_data[i] = input_data[i];
-
-        if (out_data[i] > max)
-            out_data[i] = max;
-        if (out_data[i] < min)
-            out_data[i] = min;
-    }
-
-    return 0;
-}
-
-int ref_clip_uint8(struct tensor* input_tensor, struct tensor* output_tensor, float max, float min, int num_thread)
-{
-    int total_size = input_tensor->elem_num;
-    uint8_t* input_uint8 = ( uint8_t* )input_tensor->data;
-    uint8_t* output_uint8 = ( uint8_t* )output_tensor->data;
-
-    float input_scale = input_tensor->scale;
-    float output_scale = output_tensor->scale;
-    int input_zero = input_tensor->zero_point;
-    int output_zero = output_tensor->zero_point;
-
-    /* input dequant */
-    float* input_fp32 = ( float* )sys_malloc(total_size * sizeof(float));
-    float* output_fp32 = ( float* )sys_malloc(total_size * sizeof(float));
-
-    for (uint32_t i = 0; i < input_tensor->elem_num; i++)
-        input_fp32[i] = ((float )input_uint8[i] - (float )input_zero) * input_scale;
-
-    for (int i = 0; i < total_size; i++)
-    {
-        output_fp32[i] = input_fp32[i];
-
-        if (output_fp32[i] > max)
-            output_fp32[i] = max;
-        if (output_fp32[i] < min)
-            output_fp32[i] = min;
-    }
-
-    /* output quant */
-    for (int i = 0; i < total_size; i++)
-    {
-        int output_data = (int)roundf(output_fp32[i] / output_scale) + output_zero;
-        output_uint8[i] = output_data > 255 ? 255 : output_data;
-    }
-
-    sys_free(input_fp32);
-    sys_free(output_fp32); 
-
-    return 0;
-}
-
 static int init_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
 {
     return 0;
@@ -116,12 +56,10 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
 {
     struct node* ir_node = exec_node->ir_node;
     struct graph* ir_graph = ir_node->graph;
-    struct tensor* input_tensor;
-    struct tensor* output_tensor;
+    struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
+    struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
     int layout = ir_graph->graph_layout;
 
-    input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
-    output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
     struct clip_param* clip_param = ( struct clip_param* )ir_node->op.param_mem;
 
     int in_size = input_tensor->elem_num;
@@ -130,9 +68,14 @@ static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct ex
 
     int ret = -1;
     if (input_tensor->data_type == TENGINE_DT_FP32)
-        ret = ref_clip_fp32(input_tensor, output_tensor, max, min, exec_graph->num_thread);
+        ret = ref_clip_fp32(input_tensor, output_tensor, max, min);
+    else if (input_tensor->data_type == TENGINE_DT_UINT8)
+        ret = ref_clip_uint8(input_tensor, output_tensor, max, min);
     else
-        ret = ref_clip_uint8(input_tensor, output_tensor, max, min, exec_graph->num_thread);
+    {
+        TLOG_ERR("Input data type %d not to be supported.\n", input_tensor->data_type);
+        return -1;
+    }
 
     return ret;
 }

diff --git a/source/device/cpu/op/conv/conv_kernel_ref.h b/source/device/cpu/op/conv/conv_kernel_ref.h
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Copyright (c) 2021, OPEN AI LAB
+ * Author:
+ */
+
+#ifndef __CONV_KERNEL_REF_H__
+#define __CONV_KERNEL_REF_H__
+
+#include "convolution_param.h"
+
+#include "graph/tensor.h"
+#include "graph/node.h"
+#include "graph/graph.h"
+
+
+int ref_conv_fp32(struct tensor* input_tensor, struct tensor* output_tensor, struct tensor* kernel,
+                         struct tensor* bias, struct conv_param* conv_param);
+
+int ref_conv_fp16(struct tensor* input_tensor, struct tensor* output_tensor, struct tensor* kernel,
+                         struct tensor* bias, struct conv_param* conv_param);
+
+int ref_conv_int8(struct tensor* input_tensor, struct tensor* output_tensor, struct tensor* kernel,
+                         struct tensor* bias, struct conv_param* conv_param);
+
+int ref_conv_uint8(struct tensor* input_tensor, struct tensor* output_tensor, struct tensor* kernel,
+                         struct tensor* bias, struct conv_param* conv_param);
+
+#endif