Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
fix missing gpu metrics (#2194)
Browse files Browse the repository at this point in the history
* fix missing gpu metrics

* fix format
  • Loading branch information
xudifsd authored and fanyangCS committed Feb 25, 2019
1 parent dd37f6c commit b64ae5f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
11 changes: 7 additions & 4 deletions src/job-exporter/src/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,9 @@ def __init__(self, name, sleep_time, atomic_ref, iteration_counter):
histogram_key = "collector_%s_iteration_lantecy_seconds" % self.name
histogram_desc = "latency for execute one interation of %s collector (seconds)" % \
self.name
self.collector_histogram = Histogram(histogram_key, histogram_desc)
self.collector_histogram = Histogram(histogram_key, histogram_desc,
buckets=(.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0,
7.5, 10.0, 12.5, 15.0, 17.5, 20.0, float("inf")))

logger.debug("init %s with sleep_time %d", self.name, self.sleep_time)

Expand All @@ -198,7 +200,7 @@ def collect(self):
logger.debug("finished collect metrcis from %s, will sleep for %s",
self.name, self.sleep_time)

time.sleep(self.sleep_time)
time.sleep(self.sleep_time)

def collect_impl(self):
""" implementations are expected to return an array of
Expand Down Expand Up @@ -363,8 +365,9 @@ def collect_impl(self):
ContainerCollector.stats_timeout)
self.stats_info_ref.get_and_set(stats_obj)

logger.debug("all_conns is %s, gpu_info is %s, stats_obj is %s",
all_conns, gpu_infos, stats_obj)
logger.debug("all_conns is %s", all_conns)
logger.debug("gpu_info is %s", gpu_infos)
logger.debug("stats_obj is %s", stats_obj)

return self.collect_container_metrics(stats_obj, gpu_infos, all_conns)

Expand Down
2 changes: 1 addition & 1 deletion src/job-exporter/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def main(args):
# should only sleep 10s to adapt to scrape interval
collector_args = [
("docker_daemon_collector", interval, collector.DockerCollector),
("gpu_collector", interval, collector.GpuCollector, gpu_info_ref),
("gpu_collector", interval / 2, collector.GpuCollector, gpu_info_ref),
("container_collector", interval - 18, collector.ContainerCollector,
gpu_info_ref, stats_info_ref, args.interface),
("zombie_collector", interval, collector.ZombieCollector, stats_info_ref),
Expand Down

0 comments on commit b64ae5f

Please sign in to comment.