Skip to content

Commit

Permalink
feat(inputs.linux_cpu): Add plugin to collect CPU metrics on Linux (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianishere authored Aug 24, 2022
1 parent 45abba8 commit 7f3395f
Show file tree
Hide file tree
Showing 6 changed files with 481 additions and 0 deletions.
5 changes: 5 additions & 0 deletions plugins/inputs/all/linux_cpu.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//go:build !custom || inputs || inputs.linux_cpu

package all

import _ "github.com/influxdata/telegraf/plugins/inputs/linux_cpu" // register plugin
64 changes: 64 additions & 0 deletions plugins/inputs/linux_cpu/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Linux CPU Input Plugin

The `linux_cpu` plugin gathers CPU metrics exposed on Linux-based systems.

## Configuration

```toml @sample.conf
# Collects CPU metrics exposed on Linux
[[inputs.linux_cpu]]
## Path for sysfs filesystem.
## See https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt
## Defaults:
# host_sys = "/sys"

## CPU metrics collected by the plugin.
## Supported options:
## "cpufreq", "thermal"
## Defaults:
# metrics = ["cpufreq"]
```

## Metrics

The following tags are emitted by the plugin under the name `linux_cpu`:

| Tag | Description |
|-------|-----------------------|
| `cpu` | Identifier of the CPU |

The following fields are emitted by the plugin when selecting `cpufreq`:

| Metric name (field) | Description | Units |
|---------------------|------------------------------------------------------------|-------|
| `scaling_cur_freq` | Current frequency of the CPU as determined by CPUFreq | KHz |
| `scaling_min_freq` | Minimum frequency the governor can scale to | KHz |
| `scaling_max_freq` | Maximum frequency the governor can scale to | KHz |
| `cpuinfo_cur_freq` | Current frequency of the CPU as determined by the hardware | KHz |
| `cpuinfo_min_freq` | Minimum operating frequency of the CPU | KHz |
| `cpuinfo_max_freq` | Maximum operating frequency of the CPU | KHz |

The following fields are emitted by the plugin when selecting `thermal`:

| Metric name (field) | Description | Units |
|-----------------------|-------------------------------------------------------------|-------|
| `throttle_count` | Number of thermal throttle events reported by the CPU | |
| `throttle_max_time` | Maximum amount of time CPU was in throttled state | ms |
| `throtlle_total_time` | Cumulative time during which the CPU was in throttled state | ms |

## Example Output

```shell
> linux_cpu,cpu=0,host=go scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=803157i,scaling_min_freq=400000i 1617621150000000000
> linux_cpu,cpu=1,host=go throttle_total_time=0i,scaling_cur_freq=802939i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i 1617621150000000000
> linux_cpu,cpu=10,host=go throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=838343i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i 1617621150000000000
> linux_cpu,cpu=11,host=go cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=800054i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i 1617621150000000000
> linux_cpu,cpu=2,host=go throttle_total_time=0i,scaling_cur_freq=800404i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i 1617621150000000000
> linux_cpu,cpu=3,host=go throttle_total_time=0i,scaling_cur_freq=800126i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i 1617621150000000000
> linux_cpu,cpu=4,host=go cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=800359i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i 1617621150000000000
> linux_cpu,cpu=5,host=go throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=800093i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i 1617621150000000000
> linux_cpu,cpu=6,host=go cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=741646i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i 1617621150000000000
> linux_cpu,cpu=7,host=go scaling_cur_freq=700006i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i 1617621150000000000
> linux_cpu,cpu=8,host=go throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=700046i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i 1617621150000000000
> linux_cpu,cpu=9,host=go throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=700075i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i 1617621150000000000
```
214 changes: 214 additions & 0 deletions plugins/inputs/linux_cpu/linux_cpu.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
//go:build linux

package linux_cpu

import (
_ "embed"

"fmt"
"io"
"os"
"path"
"path/filepath"
"strconv"
"strings"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal/choice"
"github.com/influxdata/telegraf/plugins/inputs"
)

const (
defaultHostSys = "/sys"
cpufreq = "cpufreq"
thermal = "thermal"
)

type LinuxCPU struct {
Log telegraf.Logger `toml:"-"`
PathSysfs string `toml:"host_sys"`
Metrics []string `toml:"metrics"`
cpus []cpu
}

type cpu struct {
id string
path string
props map[string]string
}

type prop struct {
name string
path string
optional bool
}

//go:embed sample.conf
var sampleConfig string

func (g *LinuxCPU) SampleConfig() string {
return sampleConfig
}

func (g *LinuxCPU) Init() error {
if g.PathSysfs == "" {
g.PathSysfs = defaultHostSys
}

if len(g.Metrics) == 0 {
// The user has not enabled any of the metrics
return fmt.Errorf("no metrics selected")
}

cpus, err := g.discoverCpus()
if err != nil {
return err
} else if len(cpus) == 0 {
// Although the user has specified metrics to collect, `discoverCpus` failed to find the required metrics
return fmt.Errorf("no CPUs detected to track")
}
g.cpus = cpus

return nil
}

func (g *LinuxCPU) Gather(acc telegraf.Accumulator) error {
for _, cpu := range g.cpus {
fields := make(map[string]interface{})
tags := map[string]string{"cpu": cpu.id}

failed := false
for name, propPath := range cpu.props {
v, err := readUintFromFile(propPath)
if err != nil {
acc.AddError(err)
failed = true
break
}

fields[name] = v
}

if !failed {
acc.AddFields("linux_cpu", fields, tags)
}
}

return nil
}

func (g *LinuxCPU) discoverCpus() ([]cpu, error) {
var cpus []cpu

glob := path.Join(g.PathSysfs, "devices/system/cpu/cpu[0-9]*")
cpuDirs, err := filepath.Glob(glob)
if err != nil {
return nil, err
}

if len(cpuDirs) == 0 {
return nil, fmt.Errorf("no CPUs detected at: %s", glob)
}

for _, dir := range cpuDirs {
_, cpuName := filepath.Split(dir)
cpuNum := strings.TrimPrefix(cpuName, "cpu")

cpu := cpu{
id: cpuNum,
path: dir,
props: make(map[string]string),
}

var props []prop

if choice.Contains(cpufreq, g.Metrics) {
props = append(props,
prop{name: "scaling_cur_freq", path: "cpufreq/scaling_cur_freq", optional: false},
prop{name: "scaling_min_freq", path: "cpufreq/scaling_min_freq", optional: false},
prop{name: "scaling_max_freq", path: "cpufreq/scaling_max_freq", optional: false},
prop{name: "cpuinfo_cur_freq", path: "cpufreq/cpuinfo_cur_freq", optional: true},
prop{name: "cpuinfo_min_freq", path: "cpufreq/cpuinfo_min_freq", optional: true},
prop{name: "cpuinfo_max_freq", path: "cpufreq/cpuinfo_max_freq", optional: true},
)
}

if choice.Contains(thermal, g.Metrics) {
props = append(
props,
prop{name: "throttle_count", path: "thermal_throttle/core_throttle_count", optional: false},
prop{name: "throttle_max_time", path: "thermal_throttle/core_throttle_max_time_ms", optional: false},
prop{name: "throttle_total_time", path: "thermal_throttle/core_throttle_total_time_ms", optional: false},
)
}

var failed = false
for _, prop := range props {
propPath := filepath.Join(dir, prop.path)
err := validatePath(propPath)
if err != nil {
if prop.optional {
continue
}

g.Log.Warnf("Failed to load property %s: %v", propPath, err)
failed = true
break
}

cpu.props[prop.name] = propPath
}

if len(cpu.props) == 0 {
g.Log.Warnf("No properties enabled/loaded for CPU %s", cpuNum)
failed = true
}

if !failed {
cpus = append(cpus, cpu)
}
}
return cpus, nil
}

func init() {
inputs.Add("linux_cpu", func() telegraf.Input {
return &LinuxCPU{
Metrics: []string{"cpufreq"},
}
})
}

func validatePath(propPath string) error {
f, err := os.Open(propPath)

if os.IsNotExist(err) {
return fmt.Errorf("CPU property does not exist: [%s]", propPath)
}

if err != nil {
return fmt.Errorf("cannot get system information for CPU property: [%s] - %v", propPath, err)
}

_ = f.Close() // File is not written to, closing should be safe
return nil
}

func readUintFromFile(propPath string) (uint64, error) {
f, err := os.Open(propPath)
if err != nil {
return 0, err
}
defer f.Close()

buffer := make([]byte, 22)

n, err := f.Read(buffer)
if err != nil && err != io.EOF {
return 0, fmt.Errorf("error on reading file, err: %v", err)
} else if n == 0 {
return 0, fmt.Errorf("error on reading file, file is empty")
}

return strconv.ParseUint(string(buffer[:n-1]), 10, 64)
}
3 changes: 3 additions & 0 deletions plugins/inputs/linux_cpu/linux_cpu_nonlinux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//go:build !linux

package linux_cpu
Loading

0 comments on commit 7f3395f

Please sign in to comment.