Skip to content

Commit

Permalink
Aggrgating device timing by kernel or kernel + shape in device timing…
Browse files Browse the repository at this point in the history
… summary (#39)
  • Loading branch information
zma2 authored Dec 12, 2023
1 parent 6a85c1d commit 132e274
Showing 1 changed file with 49 additions and 0 deletions.
49 changes: 49 additions & 0 deletions tools/unitrace/src/levelzero/ze_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,9 @@ class ZeCollector {
std::vector<std::string> knames;
size_t max_name_size = 0;
global_device_time_stats_mutex_.lock();

AggregateDeviceTimeStats();

std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list(
global_device_time_stats_->begin(), global_device_time_stats_->end());

Expand Down Expand Up @@ -1197,6 +1200,9 @@ class ZeCollector {
std::vector<std::string> knames;
size_t max_name_size = 0;
global_device_time_stats_mutex_.lock();

AggregateDeviceTimeStats();

std::set<std::pair<ZeKernelCommandNameKey, ZeKernelCommandTime>, utils::Comparator> sorted_list(
global_device_time_stats_->begin(), global_device_time_stats_->end());

Expand Down Expand Up @@ -3980,6 +3986,49 @@ class ZeCollector {
local_device_submissions_.CollectHostFunctionTimeStats(id, time);
}

void AggregateDeviceTimeStats() const {
// do not acquire global_device_time_stats_mutex_. caller dos it.
for (auto it = global_device_time_stats_->begin(); it != global_device_time_stats_->end(); it++) {
std::string kname;
if (it->first.tile_ >= 0) {
kname = "Tile #" + std::to_string(it->first.tile_) + ": " + GetZeKernelCommandName(it->first.kernel_command_id_, it->first.group_count_, it->first.mem_size_, options_.verbose);
}
else {
kname = GetZeKernelCommandName(it->first.kernel_command_id_, it->first.group_count_, it->first.mem_size_, options_.verbose);
}

auto it2 = it;
it2++;

for (; it2 != global_device_time_stats_->end();) {
std::string kname2;
if (it2->first.tile_ >= 0) {
kname2 = "Tile #" + std::to_string(it2->first.tile_) + ": " + GetZeKernelCommandName(it2->first.kernel_command_id_, it2->first.group_count_, it2->first.mem_size_, options_.verbose);
}
else {
kname2 = GetZeKernelCommandName(it2->first.kernel_command_id_, it2->first.group_count_, it2->first.mem_size_, options_.verbose);
}

if (kname2 == kname) {
it->second.append_time_ += it2->second.append_time_;
it->second.submit_time_ += it2->second.submit_time_;
it->second.execute_time_ += it2->second.execute_time_;
if (it->second.min_time_ > it2->second.min_time_) {
it->second.min_time_ = it2->second.min_time_;
}
if (it->second.max_time_ < it2->second.max_time_) {
it->second.max_time_ = it2->second.max_time_;
}
it->second.call_count_ += it2->second.call_count_;
it2 = global_device_time_stats_->erase(it2);
}
else {
it2++;
}
}
}
}

private: // Data
zel_tracer_handle_t tracer_ = nullptr;
CollectorOptions options_;
Expand Down

0 comments on commit 132e274

Please sign in to comment.