Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[WIP]add inference mode for op perf benchmark #15453

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions benchmark/opperf/nd_operations/binary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
get_all_elemen_wise_binary_operators


def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
"""Runs benchmarks with the given context and precision (dtype)for all the binary
broadcast operators in MXNet.

Expand All @@ -59,13 +59,13 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',

"""
# Fetch all Binary Broadcast Operators
mx_binary_broadcast_ops = get_all_broadcast_binary_operators()
mx_binary_broadcast_ops = get_all_broadcast_binary_operators(inference_mode=inference_mode)
# Run benchmarks
mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, warmup, runs)
return mx_binary_op_results


def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
"""Runs benchmarks with the given context and precision (dtype)for all the binary
element_wise operators in MXNet.

Expand All @@ -86,7 +86,7 @@ def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32

"""
# Fetch all Binary Element_wise Operators
mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators()
mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators(inference_mode)
# Run benchmarks
mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, warmup, runs)
return mx_binary_op_results
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/gemm_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"""


def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
"""Runs benchmarks with the given context and precision (dtype)for all the GEMM
operators (dot, batch_dot) in MXNet.

Expand All @@ -56,7 +56,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs
"""
# Benchmark tests for dot and batch_dot operators
dot_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "dot")], run_backward=True,
[getattr(MX_OP_MODULE, "dot")], run_backward=False if inference_mode else True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (1024, 1024),
"rhs": (1024, 1024)},
Expand All @@ -70,7 +70,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs
warmup=warmup, runs=runs)

batch_dot_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=False if inference_mode else True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (32, 1024, 1024),
"rhs": (32, 1024, 1024)},
Expand Down
8 changes: 4 additions & 4 deletions benchmark/opperf/nd_operations/nn_activation_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"""


def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
"""Runs benchmarks with the given context and precision (dtype)for all the activation
operators (relu, sigmoid, softmax) in MXNet.

Expand All @@ -57,7 +57,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
"""
# Relu and its variation
relu_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "LeakyReLU")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": (1024, 1024), "act_type": "leaky", "slope": 0.1},
Expand All @@ -79,7 +79,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
# Sigmoid => Covered as part of Unary ops
# Hard_Sigmoid
hard_sigmoid_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "hard_sigmoid")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": (1024, 1024), "alpha": 0.25, "beta": 0.5},
Expand All @@ -92,7 +92,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
# Softmax, LogSoftmax
softmax_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "softmax"),
getattr(MX_OP_MODULE, "log_softmax")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": (1024, 1024), "axis": -1, "temperature": 0.5},
Expand Down
8 changes: 4 additions & 4 deletions benchmark/opperf/nd_operations/nn_basic_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
"""


def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
# FullyConnnected operator benchmarks
fc_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "FullyConnected")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": (32, 3, 256, 256),
Expand All @@ -50,7 +50,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,

# Dropout benchmarks
dropout_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "Dropout")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": (32, 3, 256, 256),
Expand All @@ -63,7 +63,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
runs=runs)
# BatchNorm benchmarks
batchnorm_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "BatchNorm")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": (32, 3, 256, 256),
Expand Down
12 changes: 6 additions & 6 deletions benchmark/opperf/nd_operations/nn_conv_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"""


def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
pool_types = ['avg', 'max', 'sum']
global_pool_types = [0, 1]

Expand All @@ -62,7 +62,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
for global_pool in global_pool_types:
for pool1d_data in [(32, 3, 256), (32, 3, 64)]:
pool1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": pool1d_data,
Expand All @@ -76,7 +76,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
runs=runs)
for pool2d_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
pool2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": pool2d_data,
Expand All @@ -93,12 +93,12 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
return mx_pooling_op_results


def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
# Conv1D Benchmarks
conv1d_benchmark_res = []
for conv_data in [(32, 3, 256), (32, 3, 64)]:
conv1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": conv_data,
Expand All @@ -117,7 +117,7 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=2
conv2d_benchmark_res = []
for conv_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
conv2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
run_backward=False if inference_mode else True,
dtype=dtype,
ctx=ctx,
inputs=[{"data": conv_data,
Expand Down
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/random_sampling_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from benchmark.opperf.utils.op_registry_utils import get_all_random_sampling_operators


def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
"""Runs benchmarks with the given context and precision (dtype)for all the random sampling
operators in MXNet.

Expand All @@ -55,7 +55,7 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', w

"""
# Fetch all Random Sampling Operators
mx_random_sample_ops = get_all_random_sampling_operators()
mx_random_sample_ops = get_all_random_sampling_operators(inference_mode)
# Run benchmarks
mx_random_sample_op_results = run_op_benchmarks(mx_random_sample_ops, dtype, ctx, warmup, runs)
return mx_random_sample_op_results
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/reduction_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
"""Runs benchmarks with the given context and precision (dtype)for all the reduction
operators in MXNet.

Expand All @@ -52,7 +52,7 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=

"""
# Fetch all Reduction Operators
mx_reduction_broadcast_ops = get_all_reduction_operators()
mx_reduction_broadcast_ops = get_all_reduction_operators(inference_mode)
# Run benchmarks
mx_reduction_op_results = run_op_benchmarks(mx_reduction_broadcast_ops, dtype, ctx, warmup, runs)
return mx_reduction_op_results
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/unary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100, inference_mode=False):
"""Runs benchmarks with the given context and precision (dtype)for all the unary
operators in MXNet.

Expand All @@ -56,7 +56,7 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,

"""
# Fetch all Unary Operators
mx_unary_broadcast_ops = get_all_unary_operators()
mx_unary_broadcast_ops = get_all_unary_operators(inference_mode=inference_mode)
# Run benchmarks
mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype, ctx, warmup, runs)
return mx_unary_op_results
43 changes: 29 additions & 14 deletions benchmark/opperf/opperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
get_current_runtime_features


def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32'):
def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', inference_mode=False):
"""Run all the MXNet operators (NDArray) benchmarks.

Returns
Expand All @@ -56,37 +56,46 @@ def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32'):
# *************************MXNET TENSOR OPERATOR BENCHMARKS*****************************

# Run all Unary operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype))
mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype, inference_mode=inference_mode))

# Run all Binary Broadcast, element_wise operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_mx_binary_broadcast_operators_benchmarks(ctx=ctx,
dtype=dtype))
dtype=dtype,
inference_mode=inference_mode))
mxnet_operator_benchmark_results.append(run_mx_binary_element_wise_operators_benchmarks(ctx=ctx,
dtype=dtype))
dtype=dtype,
inference_mode=inference_mode))

# Run all GEMM operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_gemm_operators_benchmarks(ctx=ctx,
dtype=dtype))
dtype=dtype,
inference_mode=inference_mode))

# Run all Random sampling operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype))
mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype,
inference_mode=inference_mode))

# Run all Reduction operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype))
mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype,
inference_mode=inference_mode))

# ************************ MXNET NN OPERATOR BENCHMARKS ****************************

# Run all basic NN operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype))
mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype,
inference_mode=inference_mode))

# Run all Activation operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype))
mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype,
inference_mode=inference_mode))

# Run all Pooling operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype))
mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype,
inference_mode=inference_mode))

# Run all Convolution operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype))
mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype,
inference_mode=inference_mode))

# ****************************** PREPARE FINAL RESULTS ********************************
final_benchmark_result_map = merge_map_list(mxnet_operator_benchmark_results)
Expand Down Expand Up @@ -124,18 +133,24 @@ def main():
help='Name and path for the '
'output file.')

parser.add_argument('-i', '--inference', type=bool, default=False,
help='Run benchmarks in inference mode')

args = parser.parse_args()
logging.info(f"Running MXNet operator benchmarks with the following options: {args}")
assert not os.path.isfile(args.output_file), f"Output file {args.output_file} already exists."

# 2. RUN BENCHMARKS
ctx = _parse_mxnet_context(args.ctx)
dtype = args.dtype
final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype)
final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype, inference_mode=args.inference)

# 3. PREPARE OUTPUTS
run_time_features = get_current_runtime_features()
save_to_file(final_benchmark_results, args.output_file, args.output_format, run_time_features)
if mx.__version__ < '1.5.0':
save_to_file(final_benchmark_results, args.output_file, args.output_format)
else:
run_time_features = get_current_runtime_features()
save_to_file(final_benchmark_results, args.output_file, args.output_format, run_time_features)

# 4. Generate list of MXNet operators not covered in benchmarks
ops_not_covered = get_operators_with_no_benchmark(final_benchmark_results.keys())
Expand Down
Loading