diff --git a/benchmark/opperf/README.md b/benchmark/opperf/README.md
index 241734fdd655..e0ece1b70107 100644
--- a/benchmark/opperf/README.md
+++ b/benchmark/opperf/README.md
@@ -50,7 +50,8 @@ Hence, in this utility, we will build the functionality to allow users and devel
 Provided you have MXNet installed (any version >= 1.5.1), all you need to use opperf utility is to add path to your cloned MXNet repository to the PYTHONPATH.
 
 Note: 
-To install MXNet, refer [Installing MXNet page](https://mxnet.apache.org/versions/master/install/index.html)
+1. Currently, opperf utility requires a cloned mxnet repo. It isn't supported on PyPi binary yet. [Work in Progress]
+2. To install MXNet, refer [Installing MXNet page](https://mxnet.apache.org/versions/master/install/index.html)
 
 ```
 export PYTHONPATH=$PYTHONPATH:/path/to/incubator-mxnet/
@@ -72,6 +73,8 @@ python incubator-mxnet/benchmark/opperf/opperf.py --output-format json --output-
 
 3. **dtype** : By default, `float32`. You can override and set the global dtype for all operator benchmarks. Example: --dtype float64.
 
+4. **profiler** : By default, 'native'. You can override and set the global profiler for all operator benchmarks. Example: --profiler 'python'.
+
 ## Usecase 2 - Run benchmarks for all the operators in a specific category
 
 For example, you want to run benchmarks for all NDArray Broadcast Binary Operators, Ex: broadcast_add, broadcast_mod, broadcast_pow etc., You just run the following python script.
@@ -117,6 +120,7 @@ add_res = run_performance_test(nd.add, run_backward=True, dtype='float32', ctx=m
                                inputs=[{"lhs": (1024, 1024),
                                         "rhs": (1024, 1024)}],
                                warmup=10, runs=25)
+print(add_res)
 ```
 
 Output for the above benchmark run, on a CPU machine, would look something like below:
@@ -143,6 +147,7 @@ add_res = run_performance_test([nd.add, nd.subtract], run_backward=True, dtype='
                                inputs=[{"lhs": (1024, 1024),
                                         "rhs": (1024, 1024)}],
                                warmup=10, runs=25)
+print(add_res)
 ```
 
 Output for the above benchmark run, on a CPU machine, would look something like below:
diff --git a/benchmark/opperf/rules/default_params.py b/benchmark/opperf/rules/default_params.py
index 615e2e31b63d..23a595ee8561 100644
--- a/benchmark/opperf/rules/default_params.py
+++ b/benchmark/opperf/rules/default_params.py
@@ -92,12 +92,21 @@
 DEFAULT_CLIP_WEIGHTS = [-1.0,0.8]
 DEFAULT_LAZY_UPDATE = [0,1]
 
-# For rearrange operators
-# NOTE: Data needs to be a 4D tensor for  operators like space_to_depth and depth_to_space
+# For array manipulation operators
+# NOTE: Data needs to be a 4D tensor for  operators like space_to_depth, depth_to_space etc
 # Hence below we append 4d to mark the difference.
 # For depth_to_space, dimension 3 needs to be a multiple of 'block' and 1 should be a multiple of `block^2`
 DEFAULT_DATA_4d = [(1, 4, 2, 4), (10,25,10,100)]
 DEFAULT_BLOCK_SIZE = [2, 5]
+DEFAULT_NUM_OUTPUTS = [1]
+DEFAULT_PAD_WIDTH_4d = [(0, 0, 0, 0, 1, 1, 1, 1)]
+DEFAULT_MODE_4d = ["constant"]
+DEFAULT_REPEATS = [2]
+
+# broadcast_axis needs input array with atleast 1 dim of size 1
+# since axis is 0 (default) size(dim0)=1
+DEFAULT_DATA_DIM1 = [(1, 1024), (1, 1), (1, 100)]
+DEFAULT_SIZE = [2]
 
 # For swapaxis operator
 DEFAULT_DIM_1 = [0]
@@ -162,7 +171,14 @@
                    "data_smce": DEFAULT_DATA_SMCE,
                    "data_3d": DEFAULT_DATA_3d,
                    "label_smce": DEFAULT_LABEL_SMCE,
-                   "label": DEFAULT_LABEL}
+                   "label": DEFAULT_LABEL,
+                   "num_outputs": DEFAULT_NUM_OUTPUTS,
+                   "data_dim1": DEFAULT_DATA_DIM1,
+                   "size": DEFAULT_SIZE,
+                   "mode_4d": DEFAULT_MODE_4d,
+                   "pad_width_4d": DEFAULT_PAD_WIDTH_4d,
+                   "repeats": DEFAULT_REPEATS,
+                   "reps": DEFAULT_REPEATS}
 
 
 # These are names of MXNet operator parameters that is of type NDArray.
diff --git a/benchmark/opperf/utils/op_registry_utils.py b/benchmark/opperf/utils/op_registry_utils.py
index d4ddc1153816..9cdc6ea95b20 100644
--- a/benchmark/opperf/utils/op_registry_utils.py
+++ b/benchmark/opperf/utils/op_registry_utils.py
@@ -116,32 +116,47 @@ def prepare_op_inputs(arg_params, arg_values):
 def prepare_op_inputs(op, arg_params):
     inputs = []
 
-    # 4d tensor is needed only by following two ops
-    ops_4d = ['depth_to_space','space_to_depth']
+    # 4d tensor is needed by following ops
+    ops_4d = ['depth_to_space', 'space_to_depth', 'pad']
 
     # 3d tensor is needed by following ops
     ops_3d = ['CTCLoss', 'ctc_loss']
 
+    # following ops need atleast 1 dim of size 1
+    ops_dim1 = ['broadcast_axis', 'broadcast_like', 'broadcast_to', 'broadcast_axes']
+
     # Prepare op to default input mapping
     arg_values = {}
     for arg_name, arg_type in zip(arg_params["params"]["arg_names"],
                                   arg_params["params"]["arg_types"]):
-        if "NDArray" in arg_type and arg_name + "_nd" in DEFAULTS_INPUTS:
-            arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_nd"]
-        elif "NDArray" in arg_type and op in ops_4d and arg_name + "_4d" in DEFAULTS_INPUTS:
-            arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_4d"]
-        elif "NDArray" in arg_type and op in ops_3d and arg_name + "_3d" in DEFAULTS_INPUTS:
-            arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_3d"]
-        elif "NDArray" in arg_type and op == 'softmax_cross_entropy':
-            arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_smce"]
-        elif arg_name in DEFAULTS_INPUTS:
-            arg_values[arg_name] = DEFAULTS_INPUTS[arg_name]
-        elif "float" in arg_type and arg_name + "_float" in DEFAULTS_INPUTS:
-            arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_float"]
-        elif "Shape" in arg_type and arg_name + "_shape" in DEFAULTS_INPUTS:
-            # This is for cases where in some ops 'axis' is Int in some ops a shape tuple.
-            # Ex: axis in sum is shape, axis in sort is int.
-            arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_shape"]
+        if "NDArray" in arg_type:
+            if arg_name + "_nd" in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_nd"]
+            elif op in ops_3d and arg_name + "_3d" in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_3d"]
+            elif op == 'softmax_cross_entropy':
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_smce"]
+            elif op in ops_4d and arg_name + "_4d" in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_4d"]
+            elif op in ops_dim1 and arg_name + "_dim1" in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_dim1"]
+            elif arg_name in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name]
+        else:
+            # arg_type is not NDArray
+            if op in ops_4d and arg_name + "_4d" in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_4d"]
+            elif op in ops_dim1 and arg_name + "_dim1" in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_dim1"]
+            # default case
+            elif arg_name in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name]
+            elif "float" in arg_type and arg_name + "_float" in DEFAULTS_INPUTS:
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_float"]
+            elif "Shape" in arg_type and arg_name + "_shape" in DEFAULTS_INPUTS:
+                # This is for cases where in some ops 'axis' is Int in some ops a shape tuple.
+                # Ex: axis in sum is shape, axis in sort is int.
+                arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_shape"]
 
     # Number of different inputs we want to use to test
     # the operator
diff --git a/benchmark/opperf/utils/profiler_utils.py b/benchmark/opperf/utils/profiler_utils.py
index 45322c1066cf..d3ec38bc65d1 100644
--- a/benchmark/opperf/utils/profiler_utils.py
+++ b/benchmark/opperf/utils/profiler_utils.py
@@ -49,7 +49,8 @@ def _get_operator_profile(operator_name, operator_profile_results):
     # allows to retrieve alias operator profile from the profiler results
     # TODO handling - "identity" : "_copy"
     alias_map = {"broadcast_plus": "broadcast_add", "broadcast_minus": "broadcast_sub", "flatten": "Flatten", "max_axis": "max",
-                 "swapaxes": "SwapAxis", "flip": "reverse", "reshape": "Reshape", "crop": "slice", "sum_axis": "sum", "min_axis": "min", "CTCLoss": "ctc_loss"}
+                 "swapaxes": "SwapAxis", "flip": "reverse", "reshape": "Reshape", "crop": "slice", "sum_axis": "sum", "min_axis": "min",
+                 "CTCLoss": "ctc_loss", "broadcast_axes": "broadcast_axis"}
 
     op_name = None