Skip to content

Commit

Permalink
Apply comments
Browse files Browse the repository at this point in the history
  • Loading branch information
echuraev committed Aug 12, 2022
1 parent 8067b1c commit 08ffe3a
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 82 deletions.
8 changes: 4 additions & 4 deletions include/tvm/runtime/profiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,8 @@ PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, i
* minimum duration requirement of one `repeat`.
* i.e., When the run time of one `repeat` falls below this time,
* the `number` parameter will be automatically increased.
* \param max_repeat_ms The maximum number of repeats when measured time is equal to 0.
* It helps to avoid hanging during measurements.
* \param limit_zero_time_iterations The maximum number of repeats when
* measured time is equal to 0. It helps to avoid hanging during measurements.
* \param cooldown_interval_ms The cooldown interval in milliseconds between the number of repeats
* defined by `repeats_to_cooldown`.
* \param repeats_to_cooldown The number of repeats before the
Expand All @@ -584,8 +584,8 @@ PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, i
* \return f_timer A timer function.
*/
PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
int max_repeat_num, int cooldown_interval_ms, int repeats_to_cooldown,
PackedFunc f_preproc = nullptr);
int limit_zero_time_iterations, int cooldown_interval_ms,
int repeats_to_cooldown, PackedFunc f_preproc = nullptr);

} // namespace profiling
} // namespace runtime
Expand Down
27 changes: 16 additions & 11 deletions python/tvm/contrib/debugger/debug_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def _run_debug(
number,
repeat,
min_repeat_ms,
max_repeat_num,
limit_zero_time_iterations,
cooldown_interval_ms,
repeats_to_cooldown,
):
Expand All @@ -241,7 +241,7 @@ def _run_debug(
number=number,
repeat=repeat,
min_repeat_ms=min_repeat_ms,
max_repeat_num=max_repeat_num,
limit_zero_time_iterations=limit_zero_time_iterations,
cooldown_interval_ms=cooldown_interval_ms,
repeats_to_cooldown=repeats_to_cooldown,
)
Expand Down Expand Up @@ -281,7 +281,7 @@ def run(
number=10,
repeat=1,
min_repeat_ms=1,
max_repeat_num=100,
limit_zero_time_iterations=100,
cooldown_interval_ms=0,
repeats_to_cooldown=1,
**input_dict,
Expand Down Expand Up @@ -309,7 +309,7 @@ def run(
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
max_repeat_num: int, optional
limit_zero_time_iterations: int, optional
The maximum number of repeats when measured time is equal to 0.
It helps to avoid hanging during measurements.
Expand All @@ -331,7 +331,7 @@ def run(
number=number,
repeat=repeat,
min_repeat_ms=min_repeat_ms,
max_repeat_num=max_repeat_num,
limit_zero_time_iterations=limit_zero_time_iterations,
cooldown_interval_ms=cooldown_interval_ms,
repeats_to_cooldown=repeats_to_cooldown,
)
Expand All @@ -347,7 +347,7 @@ def run_individual(
number,
repeat=1,
min_repeat_ms=0,
max_repeat_num=100,
limit_zero_time_iterations=100,
cooldown_interval_ms=0,
repeats_to_cooldown=1,
):
Expand All @@ -372,7 +372,7 @@ def run_individual(
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
max_repeat_num: int, optional
limit_zero_time_iterations: int, optional
The maximum number of repeats when measured time is equal to 0.
It helps to avoid hanging during measurements.
Expand All @@ -389,7 +389,12 @@ def run_individual(
the repeat of the measurement.
"""
res = self._run_individual(
number, repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms, repeats_to_cooldown
number,
repeat,
min_repeat_ms,
limit_zero_time_iterations,
cooldown_interval_ms,
repeats_to_cooldown,
)
results = []
offset = 0
Expand All @@ -409,7 +414,7 @@ def run_individual_node(
number=10,
repeat=1,
min_repeat_ms=0,
max_repeat_num=100,
limit_zero_time_iterations=100,
cooldown_interval_ms=0,
repeats_to_cooldown=1,
):
Expand Down Expand Up @@ -441,7 +446,7 @@ def run_individual_node(
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
max_repeat_num: int, optional
limit_zero_time_iterations: int, optional
The maximum number of repeats when measured time is equal to 0.
It helps to avoid hanging during measurements.
Expand All @@ -462,7 +467,7 @@ def run_individual_node(
number,
repeat,
min_repeat_ms,
max_repeat_num,
limit_zero_time_iterations,
cooldown_interval_ms,
repeats_to_cooldown,
)
Expand Down
8 changes: 4 additions & 4 deletions python/tvm/contrib/graph_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def benchmark(
repeat=5,
number=5,
min_repeat_ms=None,
max_repeat_num=100,
limit_zero_time_iterations=100,
end_to_end=False,
cooldown_interval_ms=0,
repeats_to_cooldown=1,
Expand Down Expand Up @@ -403,7 +403,7 @@ def benchmark(
milliseconds. This can be used to ensure that the function is run enough to get an
accurate measurement.
max_repeat_num : Optional[int]
limit_zero_time_iterations : Optional[int]
The maximum number of repeats when measured time is equal to 0.
It helps to avoid hanging during measurements.
Expand Down Expand Up @@ -442,7 +442,7 @@ def benchmark(
repeat=repeat,
number=number,
min_repeat_ms=min_repeat_ms,
max_repeat_num=max_repeat_num,
limit_zero_time_iterations=limit_zero_time_iterations,
)(device.device_type % rpc_base.RPC_SESS_MASK, device.device_id, *args)
if kwargs:
self.set_input(**kwargs)
Expand All @@ -452,7 +452,7 @@ def benchmark(
repeat=repeat,
number=number,
min_repeat_ms=min_repeat_ms,
max_repeat_num=max_repeat_num,
limit_zero_time_iterations=limit_zero_time_iterations,
cooldown_interval_ms=cooldown_interval_ms,
repeats_to_cooldown=repeats_to_cooldown,
)()
6 changes: 3 additions & 3 deletions python/tvm/runtime/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def time_evaluator(
number=10,
repeat=1,
min_repeat_ms=0,
max_repeat_num=100,
limit_zero_time_iterations=100,
cooldown_interval_ms=0,
repeats_to_cooldown=1,
f_preproc="",
Expand Down Expand Up @@ -311,7 +311,7 @@ def time_evaluator(
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
max_repeat_num: int, optional
limit_zero_time_iterations: int, optional
The maximum number of repeats when measured time is equal to 0.
It helps to avoid hanging during measurements.
Expand Down Expand Up @@ -345,7 +345,7 @@ def time_evaluator(
number,
repeat,
min_repeat_ms,
max_repeat_num,
limit_zero_time_iterations,
cooldown_interval_ms,
repeats_to_cooldown,
f_preproc,
Expand Down
8 changes: 4 additions & 4 deletions python/tvm/runtime/vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def benchmark(
repeat=5,
number=5,
min_repeat_ms=None,
max_repeat_num=100,
limit_zero_time_iterations=100,
end_to_end=False,
cooldown_interval_ms=0,
repeats_to_cooldown=1,
Expand Down Expand Up @@ -631,7 +631,7 @@ def benchmark(
milliseconds. This can be used to ensure that the function is run enough to get an
accurate measurement.
max_repeat_num : Optional[int]
limit_zero_time_iterations : Optional[int]
The maximum number of repeats when measured time is equal to 0.
It helps to avoid hanging during measurements.
Expand Down Expand Up @@ -677,7 +677,7 @@ def benchmark(
repeat=repeat,
number=number,
min_repeat_ms=min_repeat_ms,
max_repeat_num=max_repeat_num,
limit_zero_time_iterations=limit_zero_time_iterations,
)(func_name, device.device_type % RPC_SESS_MASK, device.device_id, *packed_args)
if args or kwargs:
self.set_input(func_name, *args, **kwargs)
Expand All @@ -687,7 +687,7 @@ def benchmark(
repeat=repeat,
number=number,
min_repeat_ms=min_repeat_ms,
max_repeat_num=max_repeat_num,
limit_zero_time_iterations=limit_zero_time_iterations,
cooldown_interval_ms=cooldown_interval_ms,
repeats_to_cooldown=repeats_to_cooldown,
)(func_name)
11 changes: 6 additions & 5 deletions src/runtime/crt/common/crt_runtime_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
Expand Down Expand Up @@ -477,7 +478,7 @@ typedef struct {
int number;
int repeat;
int min_repeat_ms;
int max_repeat_num;
int limit_zero_time_iterations;
int cooldown_interval_ms;
int repeats_to_cooldown;
} time_evaluator_state_t;
Expand Down Expand Up @@ -507,7 +508,7 @@ int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* re
g_time_evaluator_state.number = args[4].v_int64;
g_time_evaluator_state.repeat = args[5].v_int64;
g_time_evaluator_state.min_repeat_ms = args[6].v_int64;
g_time_evaluator_state.min_repeat_num = args[7].v_int64;
g_time_evaluator_state.limit_zero_time_iterations = args[7].v_int64;
g_time_evaluator_state.cooldown_interval_ms = args[8].v_int64;
g_time_evaluator_state.repeats_to_cooldown = args[9].v_int64;

Expand Down Expand Up @@ -591,9 +592,9 @@ tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue*
if (err != kTvmErrorNoError) {
goto release_and_return;
}
if (std::fpclassify(curr_res_seconds) == FP_ZERO) absolute_zero_times++;
if (absolute_zero_times >= max_repeat_num) break;
} while (curr_res_seconds < min_repeat_seconds);
if (fpclassify(curr_res_seconds) == FP_ZERO) absolute_zero_times++;
} while (curr_res_seconds < min_repeat_seconds &&
absolute_zero_times < g_time_evaluator_state.limit_zero_time_iterations);
double mean_exec_seconds = curr_res_seconds / g_time_evaluator_state.number;
*iter = mean_exec_seconds;
iter++;
Expand Down
45 changes: 25 additions & 20 deletions src/runtime/graph_executor/debug/graph_executor_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ class GraphExecutorDebug : public GraphExecutor {
* By default, one `repeat` contains `number` runs. If this parameter is set,
* the parameters `number` will be dynamically adjusted to meet the
* minimum duration requirement of one `repeat`.
* \param max_repeat_ms The maximum number of repeats when measured time is equal to 0.
* It helps to avoid hanging during measurements.
* \param limit_zero_time_iterations The maximum number of repeats when
* measured time is equal to 0. It helps to avoid hanging during
* measurements.
* \param cooldown_interval_ms The cooldown interval in milliseconds between the number of repeats
* defined by `repeats_to_cooldown`.
* \param repeats_to_cooldown The number of repeats before the
Expand All @@ -66,23 +67,25 @@ class GraphExecutorDebug : public GraphExecutor {
* representing the number of layers. Next the encoded real numbers are float32_t in the number of
* repeat multiplied by the number of layers.
*/
std::string RunIndividual(int number, int repeat, int min_repeat_ms, int max_repeat_num,
int cooldown_interval_ms, int repeats_to_cooldown) {
std::string RunIndividual(int number, int repeat, int min_repeat_ms,
int limit_zero_time_iterations, int cooldown_interval_ms,
int repeats_to_cooldown) {
// warmup run
GraphExecutor::Run();
std::string tkey = module_->type_key();
std::vector<std::vector<double>> time_sec_per_op(op_execs_.size());
if (tkey == "rpc") {
// RPC modules rely on remote timing which implements the logic from the else branch.
for (size_t index = 0; index < op_execs_.size(); ++index) {
time_sec_per_op[index] = RunOpRPC(index, number, repeat, min_repeat_ms, max_repeat_num,
cooldown_interval_ms, repeats_to_cooldown);
time_sec_per_op[index] =
RunOpRPC(index, number, repeat, min_repeat_ms, limit_zero_time_iterations,
cooldown_interval_ms, repeats_to_cooldown);
}
} else {
int op = 0;
for (size_t index = 0; index < op_execs_.size(); ++index) {
std::string result_str =
RunIndividualNode(index, number, repeat, min_repeat_ms, max_repeat_num,
RunIndividualNode(index, number, repeat, min_repeat_ms, limit_zero_time_iterations,
cooldown_interval_ms, repeats_to_cooldown);
const double* blob_ptr = reinterpret_cast<const double*>(result_str.data());
for (int i = 0; i < repeat; ++i, ++blob_ptr) {
Expand Down Expand Up @@ -113,7 +116,7 @@ class GraphExecutorDebug : public GraphExecutor {
}

std::string RunIndividualNode(int node_index, int number, int repeat, int min_repeat_ms,
int max_repeat_num, int cooldown_interval_ms,
int limit_zero_time_iterations, int cooldown_interval_ms,
int repeats_to_cooldown) {
std::string tkey = module_->type_key();

Expand All @@ -135,12 +138,13 @@ class GraphExecutorDebug : public GraphExecutor {
Device& d = devices_[0];
PackedFunc time_evaluator = profiling::WrapTimeEvaluator(
TypedPackedFunc<void()>([this, node_index]() { this->RunOpHost(node_index); }), d, number,
repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms, repeats_to_cooldown);
repeat, min_repeat_ms, limit_zero_time_iterations, cooldown_interval_ms,
repeats_to_cooldown);
return time_evaluator();
}

std::vector<double> RunOpRPC(int index, int number, int repeat, int min_repeat_ms,
int max_repeat_num, int cooldown_interval_ms,
int limit_zero_time_iterations, int cooldown_interval_ms,
int repeats_to_cooldown) {
std::vector<double> results(repeat, 0);
// Right now we expect either "tvm_op" for nodes which run PackedFunc or "null" for nodes
Expand All @@ -167,7 +171,7 @@ class GraphExecutorDebug : public GraphExecutor {
runtime::Registry::Get("runtime.RPCTimeEvaluator")
->
operator()(module_, name, static_cast<int>(dev.device_type), dev.device_id, number,
repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms,
repeat, min_repeat_ms, limit_zero_time_iterations, cooldown_interval_ms,
repeats_to_cooldown, "");

int num_flat_args = num_inputs + num_outputs;
Expand Down Expand Up @@ -391,17 +395,18 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
int number = args[0];
int repeat = args[1];
int min_repeat_ms = args[2];
int max_repeat_num = args[3];
int limit_zero_time_iterations = args[3];
int cooldown_interval_ms = args[4];
int repeats_to_cooldown = args[5];
ICHECK_GT(number, 0);
ICHECK_GT(repeat, 0);
ICHECK_GE(min_repeat_ms, 0);
ICHECK_GE(max_repeat_num, 0);
ICHECK_GE(limit_zero_time_iterations, 0);
ICHECK_GE(cooldown_interval_ms, 0);
ICHECK_GT(repeats_to_cooldown, 0);
std::string blob = this->RunIndividual(number, repeat, min_repeat_ms, max_repeat_num,
cooldown_interval_ms, repeats_to_cooldown);
std::string blob =
this->RunIndividual(number, repeat, min_repeat_ms, limit_zero_time_iterations,
cooldown_interval_ms, repeats_to_cooldown);
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
Expand All @@ -413,20 +418,20 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
int number = args[1];
int repeat = args[2];
int min_repeat_ms = args[3];
int max_repeat_num = args[4];
int limit_zero_time_iterations = args[4];
int cooldown_interval_ms = args[5];
int repeats_to_cooldown = args[6];
ICHECK_GE(node_index, 0);
ICHECK_LT(node_index, nodes_.size());
ICHECK_GT(number, 0);
ICHECK_GT(repeat, 0);
ICHECK_GE(min_repeat_ms, 0);
ICHECK_GE(max_repeat_num, 0);
ICHECK_GE(limit_zero_time_iterations, 0);
ICHECK_GE(cooldown_interval_ms, 0);
ICHECK_GT(repeats_to_cooldown, 0);
std::string blob =
this->RunIndividualNode(node_index, number, repeat, min_repeat_ms, max_repeat_num,
cooldown_interval_ms, repeats_to_cooldown);
std::string blob = this->RunIndividualNode(node_index, number, repeat, min_repeat_ms,
limit_zero_time_iterations, cooldown_interval_ms,
repeats_to_cooldown);
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
Expand Down
Loading

0 comments on commit 08ffe3a

Please sign in to comment.