diff --git a/src/job-exporter/src/collector.py b/src/job-exporter/src/collector.py index 6e85e68fa9..e6846457e0 100644 --- a/src/job-exporter/src/collector.py +++ b/src/job-exporter/src/collector.py @@ -180,7 +180,9 @@ def __init__(self, name, sleep_time, atomic_ref, iteration_counter): histogram_key = "collector_%s_iteration_lantecy_seconds" % self.name histogram_desc = "latency for execute one interation of %s collector (seconds)" % \ self.name - self.collector_histogram = Histogram(histogram_key, histogram_desc) + self.collector_histogram = Histogram(histogram_key, histogram_desc, + buckets=(.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, + 7.5, 10.0, 12.5, 15.0, 17.5, 20.0, float("inf"))) logger.debug("init %s with sleep_time %d", self.name, self.sleep_time) @@ -198,7 +200,7 @@ def collect(self): logger.debug("finished collect metrcis from %s, will sleep for %s", self.name, self.sleep_time) - time.sleep(self.sleep_time) + time.sleep(self.sleep_time) def collect_impl(self): """ implementations are expected to return an array of @@ -363,8 +365,9 @@ def collect_impl(self): ContainerCollector.stats_timeout) self.stats_info_ref.get_and_set(stats_obj) - logger.debug("all_conns is %s, gpu_info is %s, stats_obj is %s", - all_conns, gpu_infos, stats_obj) + logger.debug("all_conns is %s", all_conns) + logger.debug("gpu_info is %s", gpu_infos) + logger.debug("stats_obj is %s", stats_obj) return self.collect_container_metrics(stats_obj, gpu_infos, all_conns) diff --git a/src/job-exporter/src/main.py b/src/job-exporter/src/main.py index a73d913d88..3d1b47f49c 100644 --- a/src/job-exporter/src/main.py +++ b/src/job-exporter/src/main.py @@ -142,7 +142,7 @@ def main(args): # should only sleep 10s to adapt to scrape interval collector_args = [ ("docker_daemon_collector", interval, collector.DockerCollector), - ("gpu_collector", interval, collector.GpuCollector, gpu_info_ref), + ("gpu_collector", interval / 2, collector.GpuCollector, gpu_info_ref), ("container_collector", interval - 18, collector.ContainerCollector, gpu_info_ref, stats_info_ref, args.interface), ("zombie_collector", interval, collector.ZombieCollector, stats_info_ref),