apache · masahi · Sep 9, 2022 · Aug 18, 2022 · Aug 18, 2022 · Aug 18, 2022
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
@@ -439,9 +439,9 @@ class OpenCLTimerNode : public TimerNode {
  public:
   // Timer start
   virtual void Start() {
+    this->duration = 0;
     if (count_timer_execs == 0) {
       cl::OpenCLWorkspace::Global()->GetEventQueue(dev_).clear();
-      this->duration = 0;
       // Very first call of Start() leads to the recreation of
       // OpenCL command queue in profiling mode. This allows to run profile after inference.
       recreateCommandQueue();

diff --git a/tests/cpp-runtime/opencl/opencl_timer_test.cc b/tests/cpp-runtime/opencl/opencl_timer_test.cc
@@ -46,6 +46,7 @@ TEST(OpenCLTimerNode, nested_timers) {
     cl_mem cl_buf = clCreateBuffer(workspace->context, CL_MEM_READ_ONLY, BUFF_SIZE * sizeof(cl_int),
                                    NULL, &err);
     OPENCL_CHECK_ERROR(err);
+    queue = workspace->GetQueue(thr->device);
     OPENCL_CALL(clEnqueueWriteBuffer(queue, cl_buf, false, 0, BUFF_SIZE * sizeof(cl_int), tmp_buf,
                                      0, NULL, &ev));
     OPENCL_CALL(clReleaseMemObject(cl_buf));

diff --git a/tests/cpp-runtime/opencl/run_gtests.cc b/tests/cpp-runtime/opencl/run_gtests.cc
@@ -40,7 +40,7 @@ TVM_REGISTER_GLOBAL("opencl.run_gtests").set_body([](TVMArgs args, TVMRetValue*
   argv.push_back(const_cast<char*>("opencl_run_gtests"));
 
   // add parsed arguments
-  for (int i = 0; i < parsed_args.size(); ++i) {
+  for (size_t i = 0; i < parsed_args.size(); ++i) {
     argv.push_back(const_cast<char*>(parsed_args[i].data()));
   }
 

diff --git a/tests/python/driver/tvmc/test_compiler.py b/tests/python/driver/tvmc/test_compiler.py
@@ -367,8 +367,9 @@ def test_compile_opencl(tflite_mobilenet_v1_0_25_128):
     tvmc_model = tvmc.load(tflite_mobilenet_v1_0_25_128)
     tvmc_package = tvmc.compile(
         tvmc_model,
-        target="opencl --host=llvm",
+        target="opencl -host=llvm",
         desired_layout="NCHW",
+        dump_code="asm",
     )
     dumps_path = tvmc_package.package_path + ".asm"
 

diff --git a/.../python/relay/test_conv2d_nchw_texture.py → ...pencl_texture/test_conv2d_nchw_texture.py b/.../python/relay/test_conv2d_nchw_texture.py → ...pencl_texture/test_conv2d_nchw_texture.py
@@ -22,13 +22,15 @@
 from tvm.relay import testing
 from tvm.contrib import utils
 from utils.adreno_utils import gpu_preprocess, build_run_compare
+import pytest
 
 
-@tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
+dtype = tvm.testing.parameter("float32")
+
 
+@tvm.testing.requires_opencl
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(target, dtype):
     input_shape = (1, 32, 42, 42)
     filter_shape = (96, 32, 3, 3)
     bias_shape = (1, 96, 1, 1)
@@ -67,10 +69,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(target, dtype):
     input_shape = (1, 32, 40, 40)
     filter_shape = (96, 32, 2, 2)
     bias_shape = (1, 96, 1, 1)
@@ -109,10 +109,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_35_35_strides():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_35_35_strides(target, dtype):
     input_shape = (1, 48, 35, 35)
     filter_shape = (64, 48, 5, 5)
     bias_shape = (1, 64, 1, 1)
@@ -151,10 +149,8 @@ def test_conv2d_inceptionv3_35_35_strides():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_resnet50_v2_nchw_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_resnet50_v2_nchw_3c(target, dtype):
     input_shape = (1, 3, 224, 224)
     filter_shape = (64, 3, 7, 7)
     bias_shape = (1, 64, 1, 1)
@@ -194,10 +190,8 @@ def test_conv2d_resnet50_v2_nchw_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_nchw_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_nchw_3c(target, dtype):
     input_shape = (1, 3, 299, 299)
     filter_shape = (64, 3, 3, 3)
     bias_shape = (1, 64, 1, 1)
@@ -236,10 +230,8 @@ def test_conv2d_inceptionv3_nchw_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_1x1_16c16spatial():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_1x1_16c16spatial(target, dtype):
     input_shape = (1, 16, 256, 256)
     filter_shape = (32, 16, 4, 4)
     bias_shape = (1, 32, 1, 1)
@@ -278,10 +270,8 @@ def test_conv2d_1x1_16c16spatial():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_4x4_16c16pad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_4x4_16c16pad(target, dtype):
     input_shape = (1, 32, 256, 256)
     filter_shape = (32, 32, 4, 4)
     bias_shape = (1, 32, 1, 1)
@@ -320,10 +310,8 @@ def test_conv2d_4x4_16c16pad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_4x4x4_16c16pad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_4x4x4_16c16pad(target, dtype):
     input_shape = (1, 32, 256, 256)
     filter_shape = (4, 32, 4, 4)
     bias_shape = (1, 4, 1, 1)
@@ -362,10 +350,8 @@ def test_conv2d_4x4x4_16c16pad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_yolov3_v2_nchw_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_yolov3_v2_nchw_3c(target, dtype):
     input_shape = (1, 1024, 13, 13)
     filter_shape = (255, 1024, 1, 1)
     A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -397,10 +383,8 @@ def test_conv2d_yolov3_v2_nchw_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_vgg16_winograd_4d():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_vgg16_winograd_4d(target, dtype):
     input_shape = (1, 512, 28, 28)
     filter_shape = (512, 512, 3, 3)
     bias_shape = (1, 512, 1, 1)
@@ -437,7 +421,7 @@ def test_conv2d_vgg16_winograd_4d():
     stat_file = temp.relpath("stat.log")
     with open(stat_file, "w") as f:
         f.write(
-            '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 512, 28, 28], "float16"], ["TENSOR", [512, 512, 3, 3], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n'
+            f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 512, 28, 28], "{dtype}"], ["TENSOR", [512, 512, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n'
         )
     graph = build_run_compare(
         mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file
@@ -447,10 +431,8 @@ def test_conv2d_vgg16_winograd_4d():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_winograd_conv():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_winograd_conv(target, dtype):
     input_shape = (1, 4, 3, 3)
     A = relay.var("data", shape=input_shape, dtype=dtype)
     filter_shape3 = (8, 4, 3, 3)
@@ -486,7 +468,7 @@ def test_conv2d_winograd_conv():
     stat_file = temp.relpath("stat.log")
     with open(stat_file, "w") as f:
         f.write(
-            '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 4, 3, 3], "float16"], ["TENSOR", [8, 4, 3, 3], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n'
+            f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 4, 3, 3], "{dtype}"], ["TENSOR", [8, 4, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n'
         )
     graph = build_run_compare(
         mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file
@@ -496,7 +478,8 @@ def test_conv2d_winograd_conv():
 
 
 @tvm.testing.requires_opencl
-def test_residual_block():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_residual_block(target, dtype):
     """
     - some kind of residual block followed by convolution to have texture after residual block
     - scalar data type verification which should be mapped to global memory scope
@@ -515,9 +498,6 @@ def test_residual_block():
                      |                      <- buffer
                layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape1 = (32, 32, 2, 2)
     filter_shape2 = (32, 32, 1, 1)
@@ -555,7 +535,7 @@ def test_residual_block():
         kernel_size=(1, 1),
     )
     D = relay.op.add(conv2, D)
-    D = D * relay.const(0.15, "float16")
+    D = D * relay.const(0.15, dtype)
     D = relay.op.nn.relu(D)
 
     conv3 = relay.nn.conv2d(
@@ -607,7 +587,8 @@ def test_residual_block():
 
 
 @tvm.testing.requires_opencl
-def test_concat():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_concat(target, dtype):
     """
         layout_transform (NCHW->NCHW4c)
                   |                      <- buffer
@@ -619,9 +600,6 @@ def test_concat():
                      |                   <- buffer
                layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape1 = (96, 32, 2, 2)
     filter_shape2 = (32, 96, 2, 2)
@@ -721,7 +699,8 @@ def test_concat():
 
 
 @tvm.testing.requires_opencl
-def test_pooling_branching_texture_params():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_pooling_branching_texture_params(target, dtype):
     """
     Verification of the pooling and many branches having textures
                 layout_transform (NCHW->NCHW4c)
@@ -738,9 +717,6 @@ def test_pooling_branching_texture_params():
                              |                   <- buffer
                     layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape0 = (32, 32, 1, 1)
     filter_shape1 = (32, 32, 2, 2)
@@ -849,7 +825,8 @@ def test_pooling_branching_texture_params():
 
 
 @tvm.testing.requires_opencl
-def test_branching_texture_params():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_branching_texture_params(target, dtype):
     """
     Verification of passing texture to several consumers markup of relay variables in
     primary functions + on_device
@@ -866,9 +843,6 @@ def test_branching_texture_params():
                             |                   <- buffer
                     layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape0 = (32, 32, 1, 1)
     filter_shape1 = (32, 32, 2, 2)
@@ -976,7 +950,8 @@ def test_branching_texture_params():
 
 # function repeat, params scope are different in reused functions
 @tvm.testing.requires_opencl
-def test_conv2d_different_lowering_same_op():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_different_lowering_same_op(target, dtype):
     """
     Use case for verification of caching compiled functions
     Three convolutions following by each other in this case should be
@@ -993,9 +968,6 @@ def test_conv2d_different_lowering_same_op():
                          |                      <- buffer
                     layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape1 = (32, 32, 1, 1)
     A = relay.var("data", shape=input_shape, dtype=dtype)