Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Total backfill stats #29

Merged
merged 2 commits into from
Jul 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 54 additions & 18 deletions scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,18 @@ import (

// Basic metrics for the scheduler
type SchedulerMetrics struct {
threads float64
queue_size float64
dbd_queue_size float64
last_cycle float64
mean_cycle float64
cycle_per_minute float64
backfill_last_cycle float64
backfill_mean_cycle float64
backfill_depth_mean float64
threads float64
queue_size float64
dbd_queue_size float64
last_cycle float64
mean_cycle float64
cycle_per_minute float64
backfill_last_cycle float64
backfill_mean_cycle float64
backfill_depth_mean float64
total_backfilled_jobs_since_start float64
total_backfilled_jobs_since_cycle float64
total_backfilled_heterogeneous float64
}

// Execute the sdiag command and return its output
Expand Down Expand Up @@ -79,6 +82,9 @@ func ParseSchedulerMetrics(input []byte) *SchedulerMetrics {
mc := regexp.MustCompile(`^[\s]+Mean cycle$`)
cpm := regexp.MustCompile(`^[\s]+Cycles per`)
dpm := regexp.MustCompile(`^[\s]+Depth Mean$`)
tbs := regexp.MustCompile(`^[\s]+Total backfilled jobs \(since last slurm start\)`)
tbc := regexp.MustCompile(`^[\s]+Total backfilled jobs \(since last stats cycle start\)`)
tbh := regexp.MustCompile(`^[\s]+Total backfilled heterogeneous job components`)
switch {
case st.MatchString(state) == true:
sm.threads, _ = strconv.ParseFloat(strings.TrimSpace(strings.Split(line, ":")[1]), 64)
Expand Down Expand Up @@ -106,6 +112,12 @@ func ParseSchedulerMetrics(input []byte) *SchedulerMetrics {
sm.cycle_per_minute, _ = strconv.ParseFloat(strings.TrimSpace(strings.Split(line, ":")[1]), 64)
case dpm.MatchString(state) == true:
sm.backfill_depth_mean, _ = strconv.ParseFloat(strings.TrimSpace(strings.Split(line, ":")[1]), 64)
case tbs.MatchString(state) == true:
sm.total_backfilled_jobs_since_start, _ = strconv.ParseFloat(strings.TrimSpace(strings.Split(line, ":")[1]), 64)
case tbc.MatchString(state) == true:
sm.total_backfilled_jobs_since_cycle, _ = strconv.ParseFloat(strings.TrimSpace(strings.Split(line, ":")[1]), 64)
case tbh.MatchString(state) == true:
sm.total_backfilled_heterogeneous, _ = strconv.ParseFloat(strings.TrimSpace(strings.Split(line, ":")[1]), 64)
}
}
}
Expand All @@ -125,15 +137,18 @@ func SchedulerGetMetrics() *SchedulerMetrics {

// Collector strcture
type SchedulerCollector struct {
threads *prometheus.Desc
queue_size *prometheus.Desc
dbd_queue_size *prometheus.Desc
last_cycle *prometheus.Desc
mean_cycle *prometheus.Desc
cycle_per_minute *prometheus.Desc
backfill_last_cycle *prometheus.Desc
backfill_mean_cycle *prometheus.Desc
backfill_depth_mean *prometheus.Desc
threads *prometheus.Desc
queue_size *prometheus.Desc
dbd_queue_size *prometheus.Desc
last_cycle *prometheus.Desc
mean_cycle *prometheus.Desc
cycle_per_minute *prometheus.Desc
backfill_last_cycle *prometheus.Desc
backfill_mean_cycle *prometheus.Desc
backfill_depth_mean *prometheus.Desc
total_backfilled_jobs_since_start *prometheus.Desc
total_backfilled_jobs_since_cycle *prometheus.Desc
total_backfilled_heterogeneous *prometheus.Desc
}

// Send all metric descriptions
Expand All @@ -147,6 +162,9 @@ func (c *SchedulerCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.backfill_last_cycle
ch <- c.backfill_mean_cycle
ch <- c.backfill_depth_mean
ch <- c.total_backfilled_jobs_since_start
ch <- c.total_backfilled_jobs_since_cycle
ch <- c.total_backfilled_heterogeneous
}

// Send the values of all metrics
Expand All @@ -161,6 +179,9 @@ func (sc *SchedulerCollector) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(sc.backfill_last_cycle, prometheus.GaugeValue, sm.backfill_last_cycle)
ch <- prometheus.MustNewConstMetric(sc.backfill_mean_cycle, prometheus.GaugeValue, sm.backfill_mean_cycle)
ch <- prometheus.MustNewConstMetric(sc.backfill_depth_mean, prometheus.GaugeValue, sm.backfill_depth_mean)
ch <- prometheus.MustNewConstMetric(sc.total_backfilled_jobs_since_start, prometheus.GaugeValue, sm.total_backfilled_jobs_since_start)
ch <- prometheus.MustNewConstMetric(sc.total_backfilled_jobs_since_cycle, prometheus.GaugeValue, sm.total_backfilled_jobs_since_cycle)
ch <- prometheus.MustNewConstMetric(sc.total_backfilled_heterogeneous, prometheus.GaugeValue, sm.total_backfilled_heterogeneous)
}

// Returns the Slurm scheduler collector, used to register with the prometheus client
Expand Down Expand Up @@ -211,5 +232,20 @@ func NewSchedulerCollector() *SchedulerCollector {
"Information provided by the Slurm sdiag command, scheduler backfill mean depth",
nil,
nil),
total_backfilled_jobs_since_start: prometheus.NewDesc(
"slurm_scheduler_backfilled_jobs_since_start_total",
"Information provided by the Slurm sdiag command, number of jobs started thanks to backfilling since last slurm start",
nil,
nil),
total_backfilled_jobs_since_cycle: prometheus.NewDesc(
"slurm_scheduler_backfilled_jobs_since_cycle_total",
"Information provided by the Slurm sdiag command, number of jobs started thanks to backfilling since last time stats where reset",
nil,
nil),
total_backfilled_heterogeneous: prometheus.NewDesc(
"slurm_scheduler_backfilled_heterogeneous_total",
"Information provided by the Slurm sdiag command, number of heterogeneous job components started thanks to backfilling since last Slurm start",
nil,
nil),
}
}
1 change: 1 addition & 0 deletions test_data/sdiag.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Main schedule statistics (microseconds):
Backfilling stats
Total backfilled jobs (since last slurm start): 111544
Total backfilled jobs (since last stats cycle start): 793
Total backfilled heterogeneous job components: 10
Total cycles: 529
Last cycle when: Wed Apr 12 11:03:21 2017
Last cycle: 1942890
Expand Down