From 7e4408b446a0149e9452272b7f3bd21670c6ed66 Mon Sep 17 00:00:00 2001 From: Dave Tucker Date: Thu, 16 May 2024 18:47:04 +0100 Subject: [PATCH] wip: Fix it? Signed-off-by: Dave Tucker --- pkg/bpf/exporter.go | 27 ++++++++++--------- pkg/bpf/test_utils.go | 6 ++--- pkg/bpf/types.go | 2 -- .../bpf/process_bpf_collector.go | 27 +++++++++++++------ pkg/config/config.go | 4 --- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/pkg/bpf/exporter.go b/pkg/bpf/exporter.go index e3781447a2..367e500a44 100644 --- a/pkg/bpf/exporter.go +++ b/pkg/bpf/exporter.go @@ -181,21 +181,22 @@ func (e *exporter) attach() error { } if config.ExposeIRQCounterMetrics { - // attach softirq_entry tracepoint to kepler_irq_trace function - irq_prog, err := e.module.GetProgram("kepler_irq_trace") - if err != nil { - klog.Warningf("could not get kepler_irq_trace: %v", err) - // disable IRQ metric - config.ExposeIRQCounterMetrics = false - } else { - if _, err := irq_prog.AttachGeneric(); err != nil { - klog.Warningf("could not attach irq/softirq_entry: %v", err) - // disable IRQ metric - config.ExposeIRQCounterMetrics = false + err := func() error { + // attach softirq_entry tracepoint to kepler_irq_trace function + irq_prog, err := e.module.GetProgram("kepler_irq_trace") + if err != nil { + return fmt.Errorf("could not get kepler_irq_trace: %v", err) } - for _, event := range SoftIRQEvents { - e.enabledSoftwareCounters[event] = struct{}{} + if _, err := irq_prog.AttachGeneric(); err != nil { + return fmt.Errorf("could not attach irq/softirq_entry: %v", err) } + e.enabledSoftwareCounters[config.IRQNetTXLabel] = struct{}{} + e.enabledSoftwareCounters[config.IRQNetRXLabel] = struct{}{} + e.enabledSoftwareCounters[config.IRQBlockLabel] = struct{}{} + return nil + }() + if err != nil { + klog.Warningf("IRQ tracing disabled: %v", err) } } diff --git a/pkg/bpf/test_utils.go b/pkg/bpf/test_utils.go index ec0a4493ad..12dc33701e 100644 --- a/pkg/bpf/test_utils.go +++ b/pkg/bpf/test_utils.go @@ -21,9 +21,9 @@ func defaultHardwareCounters() map[string]struct{} { func defaultSoftwareCounters() map[string]struct{} { swCounters := map[string]struct{}{config.CPUTime: {}, config.PageCacheHit: {}} if config.ExposeIRQCounterMetrics { - for _, event := range SoftIRQEvents { - swCounters[event] = struct{}{} - } + swCounters[config.IRQNetTXLabel] = struct{}{} + swCounters[config.IRQNetRXLabel] = struct{}{} + swCounters[config.IRQBlockLabel] = struct{}{} } return swCounters } diff --git a/pkg/bpf/types.go b/pkg/bpf/types.go index 5feccf1c04..b780dd1ba2 100644 --- a/pkg/bpf/types.go +++ b/pkg/bpf/types.go @@ -20,8 +20,6 @@ import ( "github.com/sustainable-computing-io/kepler/pkg/config" ) -var SoftIRQEvents = []string{config.IRQNetTXLabel, config.IRQNetRXLabel, config.IRQBlockLabel} - type Exporter interface { SupportedMetrics() SupportedMetrics Detach() diff --git a/pkg/collector/resourceutilization/bpf/process_bpf_collector.go b/pkg/collector/resourceutilization/bpf/process_bpf_collector.go index 9d9ed7a946..befc5fe967 100644 --- a/pkg/collector/resourceutilization/bpf/process_bpf_collector.go +++ b/pkg/collector/resourceutilization/bpf/process_bpf_collector.go @@ -33,15 +33,26 @@ import ( type ProcessBPFMetrics = bpf.ProcessBPFMetrics // update software counter metrics -func updateSWCounters(key uint64, ct *ProcessBPFMetrics, processStats map[uint64]*stats.ProcessStats) { +func updateSWCounters(key uint64, ct *ProcessBPFMetrics, processStats map[uint64]*stats.ProcessStats, bpfSupportedMetrics bpf.SupportedMetrics) { // update ebpf metrics // first update CPU time and Page Cache Hit - processStats[key].ResourceUsage[config.CPUTime].AddDeltaStat(utils.GenericSocketID, ct.ProcessRunTime) - processStats[key].ResourceUsage[config.TaskClock].AddDeltaStat(utils.GenericSocketID, ct.TaskClockTime) - processStats[key].ResourceUsage[config.PageCacheHit].AddDeltaStat(utils.GenericSocketID, ct.PageCacheHit/(1000*1000)) - // update IRQ vector. Soft IRQ events has the events ordered - for i, event := range bpf.SoftIRQEvents { - processStats[key].ResourceUsage[event].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNR[i])) + for counterKey := range bpfSupportedMetrics.SoftwareCounters { + switch counterKey { + case config.CPUTime: + processStats[key].ResourceUsage[config.CPUTime].AddDeltaStat(utils.GenericSocketID, ct.ProcessRunTime) + case config.TaskClock: + processStats[key].ResourceUsage[config.TaskClock].AddDeltaStat(utils.GenericSocketID, ct.TaskClockTime) + case config.PageCacheHit: + processStats[key].ResourceUsage[config.PageCacheHit].AddDeltaStat(utils.GenericSocketID, ct.PageCacheHit/(1000*1000)) + case config.IRQNetTXLabel: + processStats[key].ResourceUsage[config.IRQNetTXLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNR[bpf.IRQNetTX])) + case config.IRQNetRXLabel: + processStats[key].ResourceUsage[config.IRQNetRXLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNR[bpf.IRQNetRX])) + case config.IRQBlockLabel: + processStats[key].ResourceUsage[config.IRQBlockLabel].AddDeltaStat(utils.GenericSocketID, uint64(ct.VecNR[bpf.IRQBlock])) + default: + klog.Errorf("counter %s is not supported\n", counterKey) + } } } @@ -126,7 +137,7 @@ func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]* // when the process metrics are updated, reset the idle counter pStat.IdleCounter = 0 - updateSWCounters(mapKey, &ct, processStats) + updateSWCounters(mapKey, &ct, processStats, bpfSupportedMetrics) updateHWCounters(mapKey, &ct, processStats, bpfSupportedMetrics) } } diff --git a/pkg/config/config.go b/pkg/config/config.go index f83cd3b32d..132488ccf4 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -457,10 +457,6 @@ func IsCgroupMetricsEnabled() bool { return ExposeCgroupMetrics } -func IsIRQCounterMetricsEnabled() bool { - return ExposeIRQCounterMetrics -} - func SetGpuUsageMetric(metric string) { GpuUsageMetric = metric }