apache · roywei · Jul 3, 2019 · Jul 3, 2019
diff --git a/benchmark/opperf/nd_operations/binary_operators.py b/benchmark/opperf/nd_operations/binary_operators.py
@@ -38,7 +38,7 @@
     get_all_elemen_wise_binary_operators
 
 
-def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     """Runs benchmarks with the given context and precision (dtype)for all the binary
     broadcast operators in MXNet.
 
@@ -59,13 +59,13 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
 
     """
     # Fetch all Binary Broadcast Operators
-    mx_binary_broadcast_ops = get_all_broadcast_binary_operators()
+    mx_binary_broadcast_ops = get_all_broadcast_binary_operators(inference_mode=inference_mode)
     # Run benchmarks
     mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, warmup, runs)
     return mx_binary_op_results
 
 
-def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     """Runs benchmarks with the given context and precision (dtype)for all the binary
     element_wise operators in MXNet.
 
@@ -86,7 +86,7 @@ def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
 
     """
     # Fetch all Binary Element_wise Operators
-    mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators()
+    mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators(inference_mode)
     # Run benchmarks
     mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, warmup, runs)
     return mx_binary_op_results
diff --git a/benchmark/opperf/nd_operations/gemm_operators.py b/benchmark/opperf/nd_operations/gemm_operators.py
@@ -34,7 +34,7 @@
 """
 
 
-def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     """Runs benchmarks with the given context and precision (dtype)for all the GEMM
     operators (dot, batch_dot) in MXNet.
 
@@ -56,7 +56,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs
     """
     # Benchmark tests for dot and batch_dot operators
     dot_benchmark_res = run_performance_test(
-        [getattr(MX_OP_MODULE, "dot")], run_backward=True,
+        [getattr(MX_OP_MODULE, "dot")], run_backward=False if inference_mode else True,
         dtype=dtype, ctx=ctx,
         inputs=[{"lhs": (1024, 1024),
                  "rhs": (1024, 1024)},
@@ -70,7 +70,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs
         warmup=warmup, runs=runs)
 
     batch_dot_benchmark_res = run_performance_test(
-        [getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
+        [getattr(MX_OP_MODULE, "batch_dot")], run_backward=False if inference_mode else True,
         dtype=dtype, ctx=ctx,
         inputs=[{"lhs": (32, 1024, 1024),
                  "rhs": (32, 1024, 1024)},

diff --git a/benchmark/opperf/nd_operations/nn_activation_operators.py b/benchmark/opperf/nd_operations/nn_activation_operators.py
@@ -35,7 +35,7 @@
 """
 
 
-def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     """Runs benchmarks with the given context and precision (dtype)for all the activation
     operators (relu, sigmoid, softmax) in MXNet.
 
@@ -57,7 +57,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
     """
     # Relu and its variation
     relu_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "LeakyReLU")],
-                                              run_backward=True,
+                                              run_backward=False if inference_mode else True,
                                               dtype=dtype,
                                               ctx=ctx,
                                               inputs=[{"data": (1024, 1024), "act_type": "leaky", "slope": 0.1},
@@ -79,7 +79,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
     # Sigmoid => Covered as part of Unary ops
     # Hard_Sigmoid
     hard_sigmoid_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "hard_sigmoid")],
-                                                      run_backward=True,
+                                                      run_backward=False if inference_mode else True,
                                                       dtype=dtype,
                                                       ctx=ctx,
                                                       inputs=[{"data": (1024, 1024), "alpha": 0.25, "beta": 0.5},
@@ -92,7 +92,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
     # Softmax, LogSoftmax
     softmax_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "softmax"),
                                                   getattr(MX_OP_MODULE, "log_softmax")],
-                                                 run_backward=True,
+                                                 run_backward=False if inference_mode else True,
                                                  dtype=dtype,
                                                  ctx=ctx,
                                                  inputs=[{"data": (1024, 1024), "axis": -1, "temperature": 0.5},

diff --git a/benchmark/opperf/nd_operations/nn_basic_operators.py b/benchmark/opperf/nd_operations/nn_basic_operators.py
@@ -29,10 +29,10 @@
 """
 
 
-def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     # FullyConnnected operator benchmarks
     fc_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "FullyConnected")],
-                                            run_backward=True,
+                                            run_backward=False if inference_mode else True,
                                             dtype=dtype,
                                             ctx=ctx,
                                             inputs=[{"data": (32, 3, 256, 256),
@@ -50,7 +50,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
 
     # Dropout benchmarks
     dropout_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "Dropout")],
-                                                 run_backward=True,
+                                                 run_backward=False if inference_mode else True,
                                                  dtype=dtype,
                                                  ctx=ctx,
                                                  inputs=[{"data": (32, 3, 256, 256),
@@ -63,7 +63,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
                                                  runs=runs)
     # BatchNorm benchmarks
     batchnorm_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "BatchNorm")],
-                                                   run_backward=True,
+                                                   run_backward=False if inference_mode else True,
                                                    dtype=dtype,
                                                    ctx=ctx,
                                                    inputs=[{"data": (32, 3, 256, 256),

diff --git a/benchmark/opperf/nd_operations/nn_conv_operators.py b/benchmark/opperf/nd_operations/nn_conv_operators.py
@@ -51,7 +51,7 @@
 """
 
 
-def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     pool_types = ['avg', 'max', 'sum']
     global_pool_types = [0, 1]
 
@@ -62,7 +62,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
         for global_pool in global_pool_types:
             for pool1d_data in [(32, 3, 256), (32, 3, 64)]:
                 pool1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
-                                                             run_backward=True,
+                                                             run_backward=False if inference_mode else True,
                                                              dtype=dtype,
                                                              ctx=ctx,
                                                              inputs=[{"data": pool1d_data,
@@ -76,7 +76,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
                                                              runs=runs)
             for pool2d_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
                 pool2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
-                                                             run_backward=True,
+                                                             run_backward=False if inference_mode else True,
                                                              dtype=dtype,
                                                              ctx=ctx,
                                                              inputs=[{"data": pool2d_data,
@@ -93,12 +93,12 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
     return mx_pooling_op_results
 
 
-def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     # Conv1D Benchmarks
     conv1d_benchmark_res = []
     for conv_data in [(32, 3, 256), (32, 3, 64)]:
         conv1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
-                                                     run_backward=True,
+                                                     run_backward=False if inference_mode else True,
                                                      dtype=dtype,
                                                      ctx=ctx,
                                                      inputs=[{"data": conv_data,
@@ -117,7 +117,7 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=2
     conv2d_benchmark_res = []
     for conv_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
         conv2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
-                                                     run_backward=True,
+                                                     run_backward=False if inference_mode else True,
                                                      dtype=dtype,
                                                      ctx=ctx,
                                                      inputs=[{"data": conv_data,

diff --git a/benchmark/opperf/nd_operations/random_sampling_operators.py b/benchmark/opperf/nd_operations/random_sampling_operators.py
@@ -34,7 +34,7 @@
 from benchmark.opperf.utils.op_registry_utils import get_all_random_sampling_operators
 
 
-def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     """Runs benchmarks with the given context and precision (dtype)for all the random sampling
     operators in MXNet.
 
@@ -55,7 +55,7 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', w
 
     """
     # Fetch all Random Sampling Operators
-    mx_random_sample_ops = get_all_random_sampling_operators()
+    mx_random_sample_ops = get_all_random_sampling_operators(inference_mode)
     # Run benchmarks
     mx_random_sample_op_results = run_op_benchmarks(mx_random_sample_ops, dtype, ctx, warmup, runs)
     return mx_random_sample_op_results
diff --git a/benchmark/opperf/nd_operations/reduction_operators.py b/benchmark/opperf/nd_operations/reduction_operators.py
@@ -31,7 +31,7 @@
 from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks
 
 
-def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     """Runs benchmarks with the given context and precision (dtype)for all the reduction
     operators in MXNet.
 
@@ -52,7 +52,7 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=
 
     """
     # Fetch all Reduction Operators
-    mx_reduction_broadcast_ops = get_all_reduction_operators()
+    mx_reduction_broadcast_ops = get_all_reduction_operators(inference_mode)
     # Run benchmarks
     mx_reduction_op_results = run_op_benchmarks(mx_reduction_broadcast_ops, dtype, ctx, warmup, runs)
     return mx_reduction_op_results
diff --git a/benchmark/opperf/nd_operations/unary_operators.py b/benchmark/opperf/nd_operations/unary_operators.py
@@ -35,7 +35,7 @@
 from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks
 
 
-def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
     """Runs benchmarks with the given context and precision (dtype)for all the unary
     operators in MXNet.
 
@@ -56,7 +56,7 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
 
     """
     # Fetch all Unary Operators
-    mx_unary_broadcast_ops = get_all_unary_operators()
+    mx_unary_broadcast_ops = get_all_unary_operators(inference_mode=inference_mode)
     # Run benchmarks
     mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype, ctx, warmup, runs)
     return mx_unary_op_results
diff --git a/benchmark/opperf/opperf.py b/benchmark/opperf/opperf.py
@@ -44,7 +44,7 @@
     get_current_runtime_features
 
 
-def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32'):
+def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', inference_mode=False):
     """Run all the MXNet operators (NDArray) benchmarks.
 
     Returns
@@ -56,37 +56,46 @@ def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32'):
     # *************************MXNET TENSOR OPERATOR BENCHMARKS*****************************
 
     # Run all Unary operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype))
+    mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype, inference_mode=inference_mode))
 
     # Run all Binary Broadcast, element_wise operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_mx_binary_broadcast_operators_benchmarks(ctx=ctx,
-                                                                                         dtype=dtype))
+                                                                                         dtype=dtype,
+                                                                                         inference_mode=inference_mode))
     mxnet_operator_benchmark_results.append(run_mx_binary_element_wise_operators_benchmarks(ctx=ctx,
-                                                                                            dtype=dtype))
+                                                                                            dtype=dtype,
+                                                                                            inference_mode=inference_mode))
 
     # Run all GEMM operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_gemm_operators_benchmarks(ctx=ctx,
-                                                                          dtype=dtype))
+                                                                          dtype=dtype,
+                                                                          inference_mode=inference_mode))
 
     # Run all Random sampling operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype))
+    mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype,
+                                                                                        inference_mode=inference_mode))
 
     # Run all Reduction operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype))
+    mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype,
+                                                                                  inference_mode=inference_mode))
 
     # ************************ MXNET NN OPERATOR BENCHMARKS ****************************
 
     # Run all basic NN operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype))
+    mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype,
+                                                                              inference_mode=inference_mode))
 
     # Run all Activation operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype))
+    mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype,
+                                                                                inference_mode=inference_mode))
 
     # Run all Pooling operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype))
+    mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype,
+                                                                             inference_mode=inference_mode))
 
     # Run all Convolution operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype))
+    mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype,
+                                                                                 inference_mode=inference_mode))
 
     # ****************************** PREPARE FINAL RESULTS ********************************
     final_benchmark_result_map = merge_map_list(mxnet_operator_benchmark_results)
@@ -124,18 +133,24 @@ def main():
                         help='Name and path for the '
                              'output file.')
 
+    parser.add_argument('-i', '--inference', type=bool, default=False,
+                        help='Run benchmarks in inference mode')
+
     args = parser.parse_args()
     logging.info(f"Running MXNet operator benchmarks with the following options: {args}")
     assert not os.path.isfile(args.output_file), f"Output file {args.output_file} already exists."
 
     # 2. RUN BENCHMARKS
     ctx = _parse_mxnet_context(args.ctx)
     dtype = args.dtype
-    final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype)
+    final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype, inference_mode=args.inference)
 
     # 3. PREPARE OUTPUTS
-    run_time_features = get_current_runtime_features()
-    save_to_file(final_benchmark_results, args.output_file, args.output_format, run_time_features)
+    if mx.__version__ < '1.5.0':
+        save_to_file(final_benchmark_results, args.output_file, args.output_format)
+    else:
+        run_time_features = get_current_runtime_features()
+        save_to_file(final_benchmark_results, args.output_file, args.output_format, run_time_features)
 
     # 4. Generate list of MXNet operators not covered in benchmarks
     ops_not_covered = get_operators_with_no_benchmark(final_benchmark_results.keys())