diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 2a1d4e4ca74..9b9074f84de 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -9870,7 +9870,7 @@ System status metrics, like CPU and memory usage, that are collected from the op [float] == core Fields -`system-core` contains local CPU core stats. +`system-core` contains CPU metrics for a single core of a multi-core system. @@ -9889,7 +9889,7 @@ type: scaled_float format: percent -The percentage of CPU time spent in user space. On multi-core systems, you can have percentages that are greater than 100%. For example, if 3 cores are at 60% use, then the `cpu.user_p` will be 180%. +The percentage of CPU time spent in user space. [float] @@ -10038,7 +10038,7 @@ The amount of CPU time spent in involuntary wait by the virtual CPU while the hy type: long -The number of CPU cores. The CPU percentages can range from `[0, 100% * cores]`. +The number of CPU cores present on the host. The non-normalized percentages will have a maximum value of `100% * cores`. The normalized percentages already take this value into account and have a maximum value of 100%. [float] @@ -10121,6 +10121,86 @@ format: percent The percentage of CPU time spent in involuntary wait by the virtual CPU while the hypervisor was servicing another processor. Available only on Unix. +[float] +=== system.cpu.user.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent in user space. + + +[float] +=== system.cpu.system.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent in kernel space. + + +[float] +=== system.cpu.nice.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent on low-priority processes. + + +[float] +=== system.cpu.idle.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent idle. + + +[float] +=== system.cpu.iowait.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent in wait (on disk). + + +[float] +=== system.cpu.irq.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent servicing and handling hardware interrupts. + + +[float] +=== system.cpu.softirq.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent servicing and handling software interrupts. + + +[float] +=== system.cpu.steal.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent in involuntary wait by the virtual CPU while the hypervisor was servicing another processor. Available only on Unix. + + [float] === system.cpu.user.ticks @@ -10419,7 +10499,7 @@ Total space (used plus free). [float] == load Fields -Load averages. +CPU load averages. @@ -10452,7 +10532,7 @@ Load average for the last 15 minutes. type: scaled_float -Load divided by the number of cores for the last minute. +Load for the last minute divided by the number of cores. [float] @@ -10460,7 +10540,7 @@ Load divided by the number of cores for the last minute. type: scaled_float -Load divided by the number of cores for the last 5 minutes. +Load for the last 5 minutes divided by the number of cores. [float] @@ -10468,7 +10548,15 @@ Load divided by the number of cores for the last 5 minutes. type: scaled_float -Load divided by the number of cores for the last 15 minutes. +Load for the last 15 minutes divided by the number of cores. + + +[float] +=== system.load.cores + +type: long + +The number of CPU cores present on the host. [float] @@ -10789,6 +10877,16 @@ format: percent The percentage of CPU time spent by the process since the last update. Its value is similar to the %CPU value of the process displayed by the top command on Unix systems. +[float] +=== system.process.cpu.total.norm.pct + +type: scaled_float + +format: percent + +The percentage of CPU time spent by the process since the last event. This value is normalized by the number of CPU cores and it ranges from 0 to 100%. + + [float] === system.process.cpu.system diff --git a/metricbeat/mb/testing/modules.go b/metricbeat/mb/testing/modules.go index b2de09cf528..37013306267 100644 --- a/metricbeat/mb/testing/modules.go +++ b/metricbeat/mb/testing/modules.go @@ -126,6 +126,25 @@ func NewEventsFetcher(t testing.TB, config interface{}) mb.EventsFetcher { return fetcher } +func NewReportingMetricSet(t testing.TB, config interface{}) mb.ReportingMetricSet { + metricSet := newMetricSet(t, config) + + reportingMetricSet, ok := metricSet.(mb.ReportingMetricSet) + if !ok { + t.Fatal("MetricSet does not implement ReportingMetricSet") + } + + return reportingMetricSet +} + +// ReportingFetch runs the given reporting metricset and returns all of the +// events and errors that occur during that period. +func ReportingFetch(metricSet mb.ReportingMetricSet) ([]common.MapStr, []error) { + r := &capturingReporter{} + metricSet.Fetch(r) + return r.events, r.errs +} + // NewPushMetricSet instantiates a new PushMetricSet using the given // configuration. The ModuleFactory and MetricSetFactory are obtained from the // global Registry. diff --git a/metricbeat/metricbeat.full.yml b/metricbeat/metricbeat.full.yml index deacef650cd..d66fc109de2 100644 --- a/metricbeat/metricbeat.full.yml +++ b/metricbeat/metricbeat.full.yml @@ -47,8 +47,9 @@ metricbeat.modules: period: 10s processes: ['.*'] - # if true, exports the CPU usage in ticks, together with the percentage values - #cpu_ticks: false + # Configure the metric types that are included by these metricsets. + cpu.metrics: ["percentages"] # The other available options are normalized_percentages and ticks. + core.metrics: ["percentages"] # The other available option is ticks. # These options allow you to filter out all processes that are not # in the top N by CPU or memory, in order to reduce the number of documents created. diff --git a/metricbeat/module/system/_meta/config.full.yml b/metricbeat/module/system/_meta/config.full.yml index 4dde419d2ca..93928f7c185 100644 --- a/metricbeat/module/system/_meta/config.full.yml +++ b/metricbeat/module/system/_meta/config.full.yml @@ -15,8 +15,9 @@ period: 10s processes: ['.*'] - # if true, exports the CPU usage in ticks, together with the percentage values - #cpu_ticks: false + # Configure the metric types that are included by these metricsets. + cpu.metrics: ["percentages"] # The other available options are normalized_percentages and ticks. + core.metrics: ["percentages"] # The other available option is ticks. # These options allow you to filter out all processes that are not # in the top N by CPU or memory, in order to reduce the number of documents created. diff --git a/metricbeat/module/system/core/_meta/data.json b/metricbeat/module/system/core/_meta/data.json index 396ed88f17b..3af61d32854 100644 --- a/metricbeat/module/system/core/_meta/data.json +++ b/metricbeat/module/system/core/_meta/data.json @@ -11,14 +11,14 @@ }, "system": { "core": { - "id": 0, + "id": 1, "idle": { - "pct": 0.9063, - "ticks": 22204290 + "pct": 0.98, + "ticks": 243110733 }, "iowait": { "pct": 0, - "ticks": 79386 + "ticks": 0 }, "irq": { "pct": 0, @@ -30,21 +30,20 @@ }, "softirq": { "pct": 0, - "ticks": 7944 + "ticks": 0 }, "steal": { "pct": 0, "ticks": 0 }, "system": { - "pct": 0.0208, - "ticks": 160489 + "pct": 0, + "ticks": 840906 }, "user": { - "pct": 0.0729, - "ticks": 417331 + "pct": 0.02, + "ticks": 1266791 } } - }, - "type": "metricsets" + } } \ No newline at end of file diff --git a/metricbeat/module/system/core/_meta/fields.yml b/metricbeat/module/system/core/_meta/fields.yml index 00b90419156..eef7e14d69a 100644 --- a/metricbeat/module/system/core/_meta/fields.yml +++ b/metricbeat/module/system/core/_meta/fields.yml @@ -1,19 +1,19 @@ - name: core type: group description: > - `system-core` contains local CPU core stats. + `system-core` contains CPU metrics for a single core of a multi-core system. fields: - name: id type: long description: > CPU Core number. + # Percentages - name: user.pct type: scaled_float format: percent description: > - The percentage of CPU time spent in user space. On multi-core systems, you can have percentages that are greater than 100%. - For example, if 3 cores are at 60% use, then the `cpu.user_p` will be 180%. + The percentage of CPU time spent in user space. - name: user.ticks type: long @@ -100,4 +100,3 @@ The amount of CPU time spent in involuntary wait by the virtual CPU while the hypervisor was servicing another processor. Available only on Unix. - diff --git a/metricbeat/module/system/core/config.go b/metricbeat/module/system/core/config.go new file mode 100644 index 00000000000..4fa5c11929d --- /dev/null +++ b/metricbeat/module/system/core/config.go @@ -0,0 +1,46 @@ +package core + +import ( + "strings" + + "github.com/elastic/beats/libbeat/logp" + "github.com/pkg/errors" +) + +// Core metric types. +const ( + percentages = "percentages" + ticks = "ticks" +) + +// Config for the system core metricset. +type Config struct { + Metrics []string `config:"core.metrics"` + CPUTicks *bool `config:"cpu_ticks"` // Deprecated. +} + +// Validate validates the core config. +func (c Config) Validate() error { + if c.CPUTicks != nil { + logp.Deprecate("6.1", "cpu_ticks is deprecated. Add 'ticks' to the core.metrics list.") + } + + if len(c.Metrics) == 0 { + return errors.New("core.metrics cannot be empty") + } + + for _, metric := range c.Metrics { + switch strings.ToLower(metric) { + case percentages, ticks: + default: + return errors.Errorf("invalid core.metrics value '%v' (valid "+ + "options are %v and %v)", metric, percentages, ticks) + } + } + + return nil +} + +var defaultConfig = Config{ + Metrics: []string{percentages}, +} diff --git a/metricbeat/module/system/core/core.go b/metricbeat/module/system/core/core.go index 545a22d6e2b..b80e00a11de 100644 --- a/metricbeat/module/system/core/core.go +++ b/metricbeat/module/system/core/core.go @@ -3,12 +3,14 @@ package core import ( + "strings" + + "github.com/pkg/errors" + "github.com/elastic/beats/libbeat/common" "github.com/elastic/beats/metricbeat/mb" "github.com/elastic/beats/metricbeat/mb/parse" - "github.com/elastic/beats/metricbeat/module/system/cpu" - - "github.com/pkg/errors" + "github.com/elastic/beats/metricbeat/module/system" ) func init() { @@ -20,83 +22,65 @@ func init() { // MetricSet for fetching system core metrics. type MetricSet struct { mb.BaseMetricSet - cpu *cpu.CPU + config Config + cores *system.CPUCoresMonitor } -// New is a mb.MetricSetFactory that returns a cores.MetricSet. +// New returns a new core MetricSet. func New(base mb.BaseMetricSet) (mb.MetricSet, error) { - config := struct { - CpuTicks bool `config:"cpu_ticks"` // export CPU usage in ticks - }{ - CpuTicks: false, - } - + config := defaultConfig if err := base.Module().UnpackConfig(&config); err != nil { return nil, err } + if config.CPUTicks != nil && *config.CPUTicks { + config.Metrics = append(config.Metrics, "ticks") + } + return &MetricSet{ BaseMetricSet: base, - cpu: &cpu.CPU{ - CpuPerCore: true, - CpuTicks: config.CpuTicks, - }, + config: config, + cores: new(system.CPUCoresMonitor), }, nil } // Fetch fetches CPU core metrics from the OS. -func (m *MetricSet) Fetch() ([]common.MapStr, error) { - cpuCoreStat, err := cpu.GetCpuTimesList() +func (m *MetricSet) Fetch(report mb.Reporter) { + samples, err := m.cores.Sample() if err != nil { - return nil, errors.Wrap(err, "cpu core times") + report.Error(errors.Wrap(err, "failed to sample CPU core times")) + return } - m.cpu.AddCpuPercentageList(cpuCoreStat) - - cores := make([]common.MapStr, 0, len(cpuCoreStat)) - for core, stat := range cpuCoreStat { + for id, sample := range samples { + event := common.MapStr{"id": id} - coreStat := common.MapStr{ - "user": common.MapStr{ - "pct": stat.UserPercent, - }, - "system": common.MapStr{ - "pct": stat.SystemPercent, - }, - "idle": common.MapStr{ - "pct": stat.IdlePercent, - }, - "iowait": common.MapStr{ - "pct": stat.IOwaitPercent, - }, - "irq": common.MapStr{ - "pct": stat.IrqPercent, - }, - "nice": common.MapStr{ - "pct": stat.NicePercent, - }, - "softirq": common.MapStr{ - "pct": stat.SoftIrqPercent, - }, - "steal": common.MapStr{ - "pct": stat.StealPercent, - }, + for _, metric := range m.config.Metrics { + switch strings.ToLower(metric) { + case percentages: + // Use NormalizedPercentages here because per core metrics range on [0, 100%]. + pct := sample.Percentages() + event.Put("user.pct", pct.User) + event.Put("system.pct", pct.System) + event.Put("idle.pct", pct.Idle) + event.Put("iowait.pct", pct.IOWait) + event.Put("irq.pct", pct.IRQ) + event.Put("nice.pct", pct.Nice) + event.Put("softirq.pct", pct.SoftIRQ) + event.Put("steal.pct", pct.Steal) + case ticks: + ticks := sample.Ticks() + event.Put("user.ticks", ticks.User) + event.Put("system.ticks", ticks.System) + event.Put("idle.ticks", ticks.Idle) + event.Put("iowait.ticks", ticks.IOWait) + event.Put("irq.ticks", ticks.IRQ) + event.Put("nice.ticks", ticks.Nice) + event.Put("softirq.ticks", ticks.SoftIRQ) + event.Put("steal.ticks", ticks.Steal) + } } - if m.cpu.CpuTicks { - coreStat["user"].(common.MapStr)["ticks"] = stat.User - coreStat["system"].(common.MapStr)["ticks"] = stat.Sys - coreStat["nice"].(common.MapStr)["ticks"] = stat.Nice - coreStat["idle"].(common.MapStr)["ticks"] = stat.Idle - coreStat["iowait"].(common.MapStr)["ticks"] = stat.Wait - coreStat["irq"].(common.MapStr)["ticks"] = stat.Irq - coreStat["softirq"].(common.MapStr)["ticks"] = stat.SoftIrq - coreStat["steal"].(common.MapStr)["ticks"] = stat.Stolen - } - - coreStat["id"] = core - cores = append(cores, coreStat) + report.Event(event) } - - return cores, nil } diff --git a/metricbeat/module/system/core/core_test.go b/metricbeat/module/system/core/core_test.go index 107a496a5b5..8d4b6f7d69b 100644 --- a/metricbeat/module/system/core/core_test.go +++ b/metricbeat/module/system/core/core_test.go @@ -4,29 +4,33 @@ package core import ( "testing" - "time" mbtest "github.com/elastic/beats/metricbeat/mb/testing" ) func TestData(t *testing.T) { - f := mbtest.NewEventsFetcher(t, getConfig()) + f := mbtest.NewReportingMetricSet(t, getConfig()) - // Fetch once in advance to have percentage values - f.Fetch() - time.Sleep(1 * time.Second) + mbtest.ReportingFetch(f) + time.Sleep(500 * time.Millisecond) - err := mbtest.WriteEvents(f, t) - if err != nil { - t.Fatal("write", err) + events, errs := mbtest.ReportingFetch(f) + if len(errs) > 0 { + t.Fatal(errs) + } + if len(events) == 0 { + t.Fatal("no events returned") } + + event := mbtest.CreateFullEvent(f, events[1]) + mbtest.WriteEventToDataJSON(t, event) } func getConfig() map[string]interface{} { return map[string]interface{}{ - "module": "system", - "metricsets": []string{"core"}, - "cpu_ticks": true, + "module": "system", + "metricsets": []string{"core"}, + "core.metrics": []string{"percentages", "ticks"}, } } diff --git a/metricbeat/module/system/core/doc.go b/metricbeat/module/system/core/doc.go index faecb5cdd76..0ddd0d71199 100644 --- a/metricbeat/module/system/core/doc.go +++ b/metricbeat/module/system/core/doc.go @@ -1,4 +1,2 @@ -/* -Package core collects cpu core metrics from the host OS. -*/ +// Package core collects cpu core metrics from the host OS. package core diff --git a/metricbeat/module/system/cpu/_meta/data.json b/metricbeat/module/system/cpu/_meta/data.json index 17d7e58cde3..7d37e8c13cb 100644 --- a/metricbeat/module/system/cpu/_meta/data.json +++ b/metricbeat/module/system/cpu/_meta/data.json @@ -13,38 +13,61 @@ "cpu": { "cores": 8, "idle": { - "pct": 7.0854, - "ticks": 1617015818 + "norm": { + "pct": 0.8851 + }, + "pct": 7.0811, + "ticks": 1680102981 }, "iowait": { + "norm": { + "pct": 0 + }, "pct": 0, "ticks": 0 }, "irq": { + "norm": { + "pct": 0 + }, "pct": 0, "ticks": 0 }, "nice": { + "norm": { + "pct": 0 + }, "pct": 0, "ticks": 0 }, "softirq": { + "norm": { + "pct": 0 + }, "pct": 0, "ticks": 0 }, "steal": { + "norm": { + "pct": 0 + }, "pct": 0, "ticks": 0 }, "system": { - "pct": 0.3317, - "ticks": 40488863 + "norm": { + "pct": 0.0412 + }, + "pct": 0.3296, + "ticks": 42755361 }, "user": { - "pct": 0.5829, - "ticks": 48194733 + "norm": { + "pct": 0.0737 + }, + "pct": 0.5893, + "ticks": 50330417 } } - }, - "type": "metricsets" + } } \ No newline at end of file diff --git a/metricbeat/module/system/cpu/_meta/fields.yml b/metricbeat/module/system/cpu/_meta/fields.yml index 3d874af492f..8747e6f5b41 100644 --- a/metricbeat/module/system/cpu/_meta/fields.yml +++ b/metricbeat/module/system/cpu/_meta/fields.yml @@ -6,14 +6,19 @@ - name: cores type: long description: > - The number of CPU cores. The CPU percentages can range from `[0, 100% * cores]`. + The number of CPU cores present on the host. The non-normalized + percentages will have a maximum value of `100% * cores`. The + normalized percentages already take this value into account and have + a maximum value of 100%. + # Percentages - name: user.pct type: scaled_float format: percent description: > - The percentage of CPU time spent in user space. On multi-core systems, you can have percentages that are greater than 100%. - For example, if 3 cores are at 60% use, then the `system.cpu.user.pct` will be 180%. + The percentage of CPU time spent in user space. On multi-core systems, + you can have percentages that are greater than 100%. For example, if 3 + cores are at 60% use, then the `system.cpu.user.pct` will be 180%. - name: system.pct type: scaled_float @@ -59,6 +64,58 @@ was servicing another processor. Available only on Unix. + # Normalized Percentages + - name: user.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent in user space. + + - name: system.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent in kernel space. + + - name: nice.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent on low-priority processes. + + - name: idle.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent idle. + + - name: iowait.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent in wait (on disk). + + - name: irq.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent servicing and handling hardware interrupts. + + - name: softirq.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent servicing and handling software interrupts. + + - name: steal.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent in involuntary wait by the virtual CPU while the hypervisor + was servicing another processor. + Available only on Unix. + + # Ticks - name: user.ticks type: long description: > diff --git a/metricbeat/module/system/cpu/config.go b/metricbeat/module/system/cpu/config.go new file mode 100644 index 00000000000..9c1ec5e31fa --- /dev/null +++ b/metricbeat/module/system/cpu/config.go @@ -0,0 +1,48 @@ +package cpu + +import ( + "strings" + + "github.com/elastic/beats/libbeat/logp" + "github.com/pkg/errors" +) + +// CPU metric types. +const ( + percentages = "percentages" + normalizedPercentages = "normalized_percentages" + ticks = "ticks" +) + +// Config for the system cpu metricset. +type Config struct { + Metrics []string `config:"cpu.metrics"` + CPUTicks *bool `config:"cpu_ticks"` // Deprecated. +} + +// Validate validates the cpu config. +func (c Config) Validate() error { + if c.CPUTicks != nil { + logp.Deprecate("6.1", "cpu_ticks is deprecated. Add 'ticks' to the cpu.metrics list.") + } + + if len(c.Metrics) == 0 { + return errors.New("cpu.metrics cannot be empty") + } + + for _, metric := range c.Metrics { + switch strings.ToLower(metric) { + case percentages, normalizedPercentages, ticks: + default: + return errors.Errorf("invalid cpu.metrics value '%v' (valid "+ + "options are %v, %v, and %v)", metric, percentages, + normalizedPercentages, ticks) + } + } + + return nil +} + +var defaultConfig = Config{ + Metrics: []string{percentages}, +} diff --git a/metricbeat/module/system/cpu/cpu.go b/metricbeat/module/system/cpu/cpu.go index a07a261a70c..d9be2ca0b15 100644 --- a/metricbeat/module/system/cpu/cpu.go +++ b/metricbeat/module/system/cpu/cpu.go @@ -3,11 +3,14 @@ package cpu import ( + "strings" + + "github.com/pkg/errors" + "github.com/elastic/beats/libbeat/common" "github.com/elastic/beats/metricbeat/mb" "github.com/elastic/beats/metricbeat/mb/parse" - - "github.com/pkg/errors" + "github.com/elastic/beats/metricbeat/module/system" ) func init() { @@ -19,79 +22,71 @@ func init() { // MetricSet for fetching system CPU metrics. type MetricSet struct { mb.BaseMetricSet - cpu *CPU + config Config + cpu *system.CPUMonitor } // New is a mb.MetricSetFactory that returns a cpu.MetricSet. func New(base mb.BaseMetricSet) (mb.MetricSet, error) { - - config := struct { - CpuTicks bool `config:"cpu_ticks"` // export CPU usage in ticks - }{ - CpuTicks: false, - } - + config := defaultConfig if err := base.Module().UnpackConfig(&config); err != nil { return nil, err } + if config.CPUTicks != nil && *config.CPUTicks { + config.Metrics = append(config.Metrics, "ticks") + } + return &MetricSet{ BaseMetricSet: base, - cpu: &CPU{ - CpuTicks: config.CpuTicks, - }, + config: config, + cpu: new(system.CPUMonitor), }, nil } // Fetch fetches CPU metrics from the OS. func (m *MetricSet) Fetch() (common.MapStr, error) { - - stat, err := GetCpuTimes() + sample, err := m.cpu.Sample() if err != nil { - return nil, errors.Wrap(err, "cpu times") + return nil, errors.Wrap(err, "failed to fetch CPU times") } - m.cpu.AddCpuPercentage(stat) - - cpuCores := GetCores() - cpuStat := common.MapStr{ - "cores": cpuCores, - "user": common.MapStr{ - "pct": stat.UserPercent, - }, - "system": common.MapStr{ - "pct": stat.SystemPercent, - }, - "idle": common.MapStr{ - "pct": stat.IdlePercent, - }, - "iowait": common.MapStr{ - "pct": stat.IOwaitPercent, - }, - "irq": common.MapStr{ - "pct": stat.IrqPercent, - }, - "nice": common.MapStr{ - "pct": stat.NicePercent, - }, - "softirq": common.MapStr{ - "pct": stat.SoftIrqPercent, - }, - "steal": common.MapStr{ - "pct": stat.StealPercent, - }, - } + event := common.MapStr{"cores": system.NumCPU} - if m.cpu.CpuTicks { - cpuStat["user"].(common.MapStr)["ticks"] = stat.User - cpuStat["system"].(common.MapStr)["ticks"] = stat.Sys - cpuStat["nice"].(common.MapStr)["ticks"] = stat.Nice - cpuStat["idle"].(common.MapStr)["ticks"] = stat.Idle - cpuStat["iowait"].(common.MapStr)["ticks"] = stat.Wait - cpuStat["irq"].(common.MapStr)["ticks"] = stat.Irq - cpuStat["softirq"].(common.MapStr)["ticks"] = stat.SoftIrq - cpuStat["steal"].(common.MapStr)["ticks"] = stat.Stolen + for _, metric := range m.config.Metrics { + switch strings.ToLower(metric) { + case percentages: + pct := sample.Percentages() + event.Put("user.pct", pct.User) + event.Put("system.pct", pct.System) + event.Put("idle.pct", pct.Idle) + event.Put("iowait.pct", pct.IOWait) + event.Put("irq.pct", pct.IRQ) + event.Put("nice.pct", pct.Nice) + event.Put("softirq.pct", pct.SoftIRQ) + event.Put("steal.pct", pct.Steal) + case normalizedPercentages: + normalizedPct := sample.NormalizedPercentages() + event.Put("user.norm.pct", normalizedPct.User) + event.Put("system.norm.pct", normalizedPct.System) + event.Put("idle.norm.pct", normalizedPct.Idle) + event.Put("iowait.norm.pct", normalizedPct.IOWait) + event.Put("irq.norm.pct", normalizedPct.IRQ) + event.Put("nice.norm.pct", normalizedPct.Nice) + event.Put("softirq.norm.pct", normalizedPct.SoftIRQ) + event.Put("steal.norm.pct", normalizedPct.Steal) + case ticks: + ticks := sample.Ticks() + event.Put("user.ticks", ticks.User) + event.Put("system.ticks", ticks.System) + event.Put("idle.ticks", ticks.Idle) + event.Put("iowait.ticks", ticks.IOWait) + event.Put("irq.ticks", ticks.IRQ) + event.Put("nice.ticks", ticks.Nice) + event.Put("softirq.ticks", ticks.SoftIRQ) + event.Put("steal.ticks", ticks.Steal) + } } - return cpuStat, nil + return event, nil } diff --git a/metricbeat/module/system/cpu/cpu_test.go b/metricbeat/module/system/cpu/cpu_test.go index 13f87472cee..f1331c4a46a 100644 --- a/metricbeat/module/system/cpu/cpu_test.go +++ b/metricbeat/module/system/cpu/cpu_test.go @@ -11,21 +11,25 @@ import ( func TestData(t *testing.T) { f := mbtest.NewEventFetcher(t, getConfig()) - - // Do a first fetch to have percentages - f.Fetch() + _, err := f.Fetch() + if err != nil { + t.Fatal(err) + } time.Sleep(1 * time.Second) - err := mbtest.WriteEvent(f, t) + event, err := f.Fetch() if err != nil { - t.Fatal("write", err) + t.Fatal(err) } + + event = mbtest.CreateFullEvent(f, event) + mbtest.WriteEventToDataJSON(t, event) } func getConfig() map[string]interface{} { return map[string]interface{}{ - "module": "system", - "metricsets": []string{"cpu"}, - "cpu_ticks": true, + "module": "system", + "metricsets": []string{"cpu"}, + "cpu.metrics": []string{"percentages", "normalized_percentages", "ticks"}, } } diff --git a/metricbeat/module/system/cpu/doc.go b/metricbeat/module/system/cpu/doc.go index 4e4ebc1f5b0..9dd9378510a 100644 --- a/metricbeat/module/system/cpu/doc.go +++ b/metricbeat/module/system/cpu/doc.go @@ -1,4 +1,2 @@ -/* -Package cpu collects CPU metrics from the host OS. -*/ +// Package cpu collects CPU metrics from the host OS. package cpu diff --git a/metricbeat/module/system/cpu/helper.go b/metricbeat/module/system/cpu/helper.go deleted file mode 100644 index 952346e517f..00000000000 --- a/metricbeat/module/system/cpu/helper.go +++ /dev/null @@ -1,135 +0,0 @@ -// +build darwin freebsd linux openbsd windows - -package cpu - -import ( - "runtime" - - "github.com/elastic/beats/metricbeat/module/system" - sigar "github.com/elastic/gosigar" -) - -// NumCPU is the number of CPU cores the system has. -var NumCPU = runtime.NumCPU() - -type CPU struct { - CpuPerCore bool - LastCpuTimes *CpuTimes - LastCpuTimesList []CpuTimes - CpuTicks bool - Cores int -} - -type CpuTimes struct { - sigar.Cpu - UserPercent float64 `json:"user_p"` - SystemPercent float64 `json:"system_p"` - IdlePercent float64 `json:"idle_p"` - IOwaitPercent float64 `json:"iowait_p"` - IrqPercent float64 `json:"irq_p"` - NicePercent float64 `json:"nice_p"` - SoftIrqPercent float64 `json:"softirq_p"` - StealPercent float64 `json:"steal_p"` -} - -func GetCpuTimes() (*CpuTimes, error) { - - cpu := sigar.Cpu{} - err := cpu.Get() - if err != nil { - return nil, err - } - - return &CpuTimes{Cpu: cpu}, nil -} - -func GetCpuTimesList() ([]CpuTimes, error) { - - cpuList := sigar.CpuList{} - err := cpuList.Get() - if err != nil { - return nil, err - } - - cpuTimes := make([]CpuTimes, len(cpuList.List)) - - for i, cpu := range cpuList.List { - cpuTimes[i] = CpuTimes{Cpu: cpu} - } - - return cpuTimes, nil -} - -func GetCpuPercentage(last *CpuTimes, current *CpuTimes) *CpuTimes { - - if last != nil && current != nil { - allDelta := current.Cpu.Total() - last.Cpu.Total() - - if allDelta == 0 { - // first inquiry - return current - } - - calculate := func(field2 uint64, field1 uint64) float64 { - - perc := 0.0 - delta := int64(field2 - field1) - perc = float64(delta) / float64(allDelta) - return system.Round(perc*float64(NumCPU), .5, 4) - } - - current.UserPercent = calculate(current.Cpu.User, last.Cpu.User) - current.SystemPercent = calculate(current.Cpu.Sys, last.Cpu.Sys) - current.IdlePercent = calculate(current.Cpu.Idle, last.Cpu.Idle) - current.IOwaitPercent = calculate(current.Cpu.Wait, last.Cpu.Wait) - current.IrqPercent = calculate(current.Cpu.Irq, last.Cpu.Irq) - current.NicePercent = calculate(current.Cpu.Nice, last.Cpu.Nice) - current.SoftIrqPercent = calculate(current.Cpu.SoftIrq, last.Cpu.SoftIrq) - current.StealPercent = calculate(current.Cpu.Stolen, last.Cpu.Stolen) - } - - return current -} - -func GetCpuPercentageList(last, current []CpuTimes) []CpuTimes { - - if last != nil && current != nil && len(last) == len(current) { - - calculate := func(field2 uint64, field1 uint64, all_delta uint64) float64 { - - perc := 0.0 - delta := int64(field2 - field1) - perc = float64(delta) / float64(all_delta) - return system.Round(perc, .5, 4) - } - - for i := 0; i < len(last); i++ { - allDelta := current[i].Cpu.Total() - last[i].Cpu.Total() - current[i].UserPercent = calculate(current[i].Cpu.User, last[i].Cpu.User, allDelta) - current[i].SystemPercent = calculate(current[i].Cpu.Sys, last[i].Cpu.Sys, allDelta) - current[i].IdlePercent = calculate(current[i].Cpu.Idle, last[i].Cpu.Idle, allDelta) - current[i].IOwaitPercent = calculate(current[i].Cpu.Wait, last[i].Cpu.Wait, allDelta) - current[i].IrqPercent = calculate(current[i].Cpu.Irq, last[i].Cpu.Irq, allDelta) - current[i].NicePercent = calculate(current[i].Cpu.Nice, last[i].Cpu.Nice, allDelta) - current[i].SoftIrqPercent = calculate(current[i].Cpu.SoftIrq, last[i].Cpu.SoftIrq, allDelta) - current[i].StealPercent = calculate(current[i].Cpu.Stolen, last[i].Cpu.Stolen, allDelta) - - } - - } - - return current -} - -func GetCores() int { - cores := runtime.NumCPU() - return cores -} - -func (cpu *CPU) AddCpuPercentage(t2 *CpuTimes) { - cpu.LastCpuTimes = GetCpuPercentage(cpu.LastCpuTimes, t2) -} - -func (cpu *CPU) AddCpuPercentageList(t2 []CpuTimes) { - cpu.LastCpuTimesList = GetCpuPercentageList(cpu.LastCpuTimesList, t2) -} diff --git a/metricbeat/module/system/cpu/helper_test.go b/metricbeat/module/system/cpu/helper_test.go deleted file mode 100644 index a25bbe560f4..00000000000 --- a/metricbeat/module/system/cpu/helper_test.go +++ /dev/null @@ -1,65 +0,0 @@ -// +build !integration -// +build darwin freebsd linux openbsd windows - -package cpu - -import ( - "runtime" - "testing" - - "github.com/elastic/gosigar" - "github.com/stretchr/testify/assert" -) - -func TestGetCpuTimes(t *testing.T) { - stat, err := GetCpuTimes() - - assert.NotNil(t, stat) - assert.Nil(t, err) - - assert.True(t, (stat.User > 0)) - assert.True(t, (stat.Sys > 0)) -} - -func TestCpuPercentage(t *testing.T) { - NumCPU = 1 - defer func() { NumCPU = runtime.NumCPU() }() - - cpu := CPU{} - - cpu1 := CpuTimes{ - Cpu: gosigar.Cpu{ - User: 10855311, - Nice: 0, - Sys: 2021040, - Idle: 17657874, - Wait: 0, - Irq: 0, - SoftIrq: 0, - Stolen: 0, - }, - } - - cpu.AddCpuPercentage(&cpu1) - - assert.Equal(t, cpu1.UserPercent, 0.0) - assert.Equal(t, cpu1.SystemPercent, 0.0) - - cpu2 := CpuTimes{ - Cpu: gosigar.Cpu{ - User: 10855693, - Nice: 0, - Sys: 2021058, - Idle: 17657876, - Wait: 0, - Irq: 0, - SoftIrq: 0, - Stolen: 0, - }, - } - - cpu.AddCpuPercentage(&cpu2) - - assert.Equal(t, cpu2.UserPercent, 0.9502) - assert.Equal(t, cpu2.SystemPercent, 0.0448) -} diff --git a/metricbeat/module/system/filesystem/helper.go b/metricbeat/module/system/filesystem/helper.go index ca1ddaf4142..c7f6159a27b 100644 --- a/metricbeat/module/system/filesystem/helper.go +++ b/metricbeat/module/system/filesystem/helper.go @@ -61,7 +61,7 @@ func AddFileSystemUsedPercentage(f *FileSystemStat) { } perc := float64(f.Used) / float64(f.Total) - f.UsedPercent = system.Round(perc, .5, 4) + f.UsedPercent = system.Round(perc) } func GetFilesystemEvent(fsStat *FileSystemStat) common.MapStr { diff --git a/metricbeat/module/system/load/_meta/data.json b/metricbeat/module/system/load/_meta/data.json index 405382baf86..553f3d3bd20 100644 --- a/metricbeat/module/system/load/_meta/data.json +++ b/metricbeat/module/system/load/_meta/data.json @@ -5,22 +5,21 @@ "name": "host.example.com" }, "metricset": { - "host": "localhost", "module": "system", "name": "load", "rtt": 115 }, "system": { "load": { - "1": 1.09, - "15": 0.49, - "5": 0.65, + "1": 1.7773, + "15": 1.645, + "5": 1.8062, + "cores": 8, "norm": { - "1": 0.545, - "15": 0.245, - "5": 0.325 + "1": 0.2222, + "15": 0.2056, + "5": 0.2258 } } - }, - "type": "metricsets" + } } \ No newline at end of file diff --git a/metricbeat/module/system/load/_meta/fields.yml b/metricbeat/module/system/load/_meta/fields.yml index 9808ac9cd5a..81b95ca0cdd 100644 --- a/metricbeat/module/system/load/_meta/fields.yml +++ b/metricbeat/module/system/load/_meta/fields.yml @@ -1,7 +1,7 @@ - name: load type: group description: > - Load averages. + CPU load averages. fields: - name: "1" type: scaled_float @@ -23,16 +23,21 @@ type: scaled_float scaling_factor: 100 description: > - Load divided by the number of cores for the last minute. + Load for the last minute divided by the number of cores. - name: "norm.5" type: scaled_float scaling_factor: 100 description: > - Load divided by the number of cores for the last 5 minutes. + Load for the last 5 minutes divided by the number of cores. - name: "norm.15" type: scaled_float scaling_factor: 100 description: > - Load divided by the number of cores for the last 15 minutes. + Load for the last 15 minutes divided by the number of cores. + + - name: "cores" + type: long + description: > + The number of CPU cores present on the host. diff --git a/metricbeat/module/system/load/doc.go b/metricbeat/module/system/load/doc.go index 67b1843dc35..35ecb168a9c 100644 --- a/metricbeat/module/system/load/doc.go +++ b/metricbeat/module/system/load/doc.go @@ -1,4 +1,2 @@ -/* -Package load collects system load metrics from the host OS. -*/ +// Package load collects system CPU load metrics from the host OS. package load diff --git a/metricbeat/module/system/load/helper.go b/metricbeat/module/system/load/helper.go deleted file mode 100644 index 23cc9fc6145..00000000000 --- a/metricbeat/module/system/load/helper.go +++ /dev/null @@ -1,39 +0,0 @@ -// +build darwin freebsd linux openbsd - -package load - -import sigar "github.com/elastic/gosigar" - -type SystemLoad struct { - Load1 float64 `json:"load1"` - Load5 float64 `json:"load5"` - Load15 float64 `json:"load15"` - - /* normalized values load / cores */ - LoadNorm1 float64 `json:"load1_norm"` - LoadNorm5 float64 `json:"load5_norm"` - LoadNorm15 float64 `json:"load15_norm"` -} - -func GetSystemLoad() (*SystemLoad, error) { - - concreteSigar := sigar.ConcreteSigar{} - avg, err := concreteSigar.GetLoadAverage() - if err != nil { - return nil, err - } - - cpuList := sigar.CpuList{} - cpuList.Get() - numCore := len(cpuList.List) - - return &SystemLoad{ - Load1: avg.One, - Load5: avg.Five, - Load15: avg.Fifteen, - - LoadNorm1: avg.One / float64(numCore), - LoadNorm5: avg.Five / float64(numCore), - LoadNorm15: avg.Fifteen / float64(numCore), - }, nil -} diff --git a/metricbeat/module/system/load/helper_test.go b/metricbeat/module/system/load/helper_test.go deleted file mode 100644 index 8b532517ea7..00000000000 --- a/metricbeat/module/system/load/helper_test.go +++ /dev/null @@ -1,22 +0,0 @@ -// +build !integration -// +build darwin freebsd linux openbsd - -package load - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestGetSystemLoad(t *testing.T) { - - load, err := GetSystemLoad() - - assert.NotNil(t, load) - assert.Nil(t, err) - - assert.True(t, (load.Load1 > 0)) - assert.True(t, (load.Load5 > 0)) - assert.True(t, (load.Load15 > 0)) -} diff --git a/metricbeat/module/system/load/load.go b/metricbeat/module/system/load/load.go index 860bc5f5bda..36ae46f0923 100644 --- a/metricbeat/module/system/load/load.go +++ b/metricbeat/module/system/load/load.go @@ -3,66 +3,51 @@ package load import ( + "github.com/pkg/errors" + "github.com/elastic/beats/libbeat/common" "github.com/elastic/beats/metricbeat/mb" "github.com/elastic/beats/metricbeat/mb/parse" "github.com/elastic/beats/metricbeat/module/system" - - "github.com/pkg/errors" ) -// init registers the MetricSet with the central registry. -// The New method will be called after the setup of the module and before starting to fetch data func init() { if err := mb.Registry.AddMetricSet("system", "load", New, parse.EmptyHostParser); err != nil { panic(err) } } -// MetricSet type defines all fields of the MetricSet -// As a minimum it must inherit the mb.BaseMetricSet fields, but can be extended with -// additional entries. These variables can be used to persist data or configuration between -// multiple fetch calls. +// MetricSet for fetching system CPU load metrics. type MetricSet struct { mb.BaseMetricSet - counter int } -// New create a new instance of the MetricSet -// Part of new is also setting up the configuration by processing additional -// configuration entries if needed. +// New returns a new load MetricSet. func New(base mb.BaseMetricSet) (mb.MetricSet, error) { - - config := struct{}{} - - if err := base.Module().UnpackConfig(&config); err != nil { - return nil, err - } - return &MetricSet{ BaseMetricSet: base, - counter: 1, }, nil } -// Fetch methods implements the data gathering and data conversion to the right format -// It returns the event which is then forward to the output. In case of an error, a -// descriptive error must be returned. +// Fetch fetches system load metrics. func (m *MetricSet) Fetch() (common.MapStr, error) { - - loadStat, err := GetSystemLoad() + load, err := system.Load() if err != nil { - return nil, errors.Wrap(err, "load statistics") + return nil, errors.Wrap(err, "failed to get CPU load values") } + avgs := load.Averages() + normAvgs := load.NormalizedAverages() + event := common.MapStr{ - "1": system.Round(loadStat.Load1, .5, 4), - "5": system.Round(loadStat.Load5, .5, 4), - "15": system.Round(loadStat.Load15, .5, 4), + "cores": system.NumCPU, + "1": avgs.OneMinute, + "5": avgs.FiveMinute, + "15": avgs.FifteenMinute, "norm": common.MapStr{ - "1": system.Round(loadStat.LoadNorm1, .5, 4), - "5": system.Round(loadStat.LoadNorm5, .5, 4), - "15": system.Round(loadStat.LoadNorm15, .5, 4), + "1": normAvgs.OneMinute, + "5": normAvgs.FiveMinute, + "15": normAvgs.FifteenMinute, }, } diff --git a/metricbeat/module/system/load/load_test.go b/metricbeat/module/system/load/load_test.go index a808b47b0f2..6aec866e75b 100644 --- a/metricbeat/module/system/load/load_test.go +++ b/metricbeat/module/system/load/load_test.go @@ -6,22 +6,19 @@ package load import ( "testing" - "time" - mbtest "github.com/elastic/beats/metricbeat/mb/testing" ) func TestData(t *testing.T) { f := mbtest.NewEventFetcher(t, getConfig()) - // Do a first fetch to have percentages - f.Fetch() - time.Sleep(1 * time.Second) - - err := mbtest.WriteEvent(f, t) + load, err := f.Fetch() if err != nil { - t.Fatal("write", err) + t.Fatal(err) } + + event := mbtest.CreateFullEvent(f, load) + mbtest.WriteEventToDataJSON(t, event) } func getConfig() map[string]interface{} { diff --git a/metricbeat/module/system/memory/helper.go b/metricbeat/module/system/memory/helper.go index 4682b69a6d9..320068db39d 100644 --- a/metricbeat/module/system/memory/helper.go +++ b/metricbeat/module/system/memory/helper.go @@ -32,10 +32,10 @@ func AddMemPercentage(m *MemStat) { } perc := float64(m.Mem.Used) / float64(m.Mem.Total) - m.UsedPercent = system.Round(perc, .5, 4) + m.UsedPercent = system.Round(perc) actualPerc := float64(m.Mem.ActualUsed) / float64(m.Mem.Total) - m.ActualUsedPercent = system.Round(actualPerc, .5, 4) + m.ActualUsedPercent = system.Round(actualPerc) } type SwapStat struct { @@ -82,5 +82,5 @@ func AddSwapPercentage(s *SwapStat) { } perc := float64(s.Swap.Used) / float64(s.Swap.Total) - s.UsedPercent = system.Round(perc, .5, 4) + s.UsedPercent = system.Round(perc) } diff --git a/metricbeat/module/system/process/_meta/data.json b/metricbeat/module/system/process/_meta/data.json index ccd187d49f7..06b6b7f3715 100644 --- a/metricbeat/module/system/process/_meta/data.json +++ b/metricbeat/module/system/process/_meta/data.json @@ -5,42 +5,36 @@ "name": "host.example.com" }, "metricset": { - "host": "localhost", "module": "system", "name": "process", "rtt": 115 }, "system": { "process": { - "cmdline": "go test -tags=integration github.com/elastic/beats/metricbeat/module/... -data", + "cmdline": "/var/folders/8x/rnyk6yxn6w97lddn3bs02gf00000gn/T/go-build935695167/github.com/elastic/beats/metricbeat/module/system/process/_test/process.test -test.v=true -data", "cpu": { - "start_time": "2016-09-06T07:33:04.000Z", + "start_time": "2017-06-23T19:57:04.691Z", "total": { - "pct": 0 + "norm": { + "pct": 0.0117 + }, + "pct": 0.0938 } }, - "fd": { - "limit": { - "hard": 1048576, - "soft": 1048576 - }, - "open": 4 - }, "memory": { "rss": { - "bytes": 13770752, - "pct": 0.0016 + "bytes": 13565952, + "pct": 0.0008 }, - "share": 6594560, - "size": 232972288 + "share": 0, + "size": 570052358144 }, - "name": "go", - "pgid": 1, - "pid": 1, - "ppid": 0, - "state": "sleeping", - "username": "root" + "name": "process.test", + "pgid": 61828, + "pid": 61938, + "ppid": 61828, + "state": "running", + "username": "akroh" } - }, - "type": "metricsets" + } } \ No newline at end of file diff --git a/metricbeat/module/system/process/_meta/fields.yml b/metricbeat/module/system/process/_meta/fields.yml index 288f687daf6..5aea476b52f 100644 --- a/metricbeat/module/system/process/_meta/fields.yml +++ b/metricbeat/module/system/process/_meta/fields.yml @@ -61,6 +61,13 @@ description: > The percentage of CPU time spent by the process since the last update. Its value is similar to the %CPU value of the process displayed by the top command on Unix systems. + - name: total.norm.pct + type: scaled_float + format: percent + description: > + The percentage of CPU time spent by the process since the last event. + This value is normalized by the number of CPU cores and it ranges + from 0 to 100%. - name: system type: long description: > diff --git a/metricbeat/module/system/process/config.go b/metricbeat/module/system/process/config.go new file mode 100644 index 00000000000..df4a488c4f4 --- /dev/null +++ b/metricbeat/module/system/process/config.go @@ -0,0 +1,38 @@ +package process + +import "github.com/elastic/beats/libbeat/logp" + +// includeTopConfig is the configuration for the "top N processes +// filtering" feature +type includeTopConfig struct { + Enabled bool `config:"enabled"` + ByCPU int `config:"by_cpu"` + ByMemory int `config:"by_memory"` +} + +type Config struct { + Procs []string `config:"processes"` + Cgroups *bool `config:"process.cgroups.enabled"` + EnvWhitelist []string `config:"process.env.whitelist"` + CacheCmdLine bool `config:"process.cmdline.cache.enabled"` + IncludeTop includeTopConfig `config:"process.include_top_n"` + IncludeCPUTicks bool `config:"process.include_cpu_ticks"` + CPUTicks *bool `config:"cpu_ticks"` // Deprecated +} + +func (c Config) Validate() error { + if c.CPUTicks != nil { + logp.Deprecate("6.1", "cpu_ticks is deprecated. Use process.include_cpu_ticks instead") + } + return nil +} + +var defaultConfig = Config{ + Procs: []string{".*"}, // collect all processes by default + CacheCmdLine: true, + IncludeTop: includeTopConfig{ + Enabled: true, + ByCPU: 0, + ByMemory: 0, + }, +} diff --git a/metricbeat/module/system/process/helper.go b/metricbeat/module/system/process/helper.go index 46cf26256a2..be530371219 100644 --- a/metricbeat/module/system/process/helper.go +++ b/metricbeat/module/system/process/helper.go @@ -19,23 +19,26 @@ import ( "github.com/pkg/errors" ) +var NumCPU = runtime.NumCPU() + type ProcsMap map[int]*Process type Process struct { - Pid int `json:"pid"` - Ppid int `json:"ppid"` - Pgid int `json:"pgid"` - Name string `json:"name"` - Username string `json:"username"` - State string `json:"state"` - CmdLine string `json:"cmdline"` - Cwd string `json:"cwd"` - Mem sigar.ProcMem - Cpu sigar.ProcTime - Ctime time.Time - FD sigar.ProcFDUsage - Env common.MapStr - cpuTotalPct float64 + Pid int `json:"pid"` + Ppid int `json:"ppid"` + Pgid int `json:"pgid"` + Name string `json:"name"` + Username string `json:"username"` + State string `json:"state"` + CmdLine string `json:"cmdline"` + Cwd string `json:"cwd"` + Mem sigar.ProcMem + Cpu sigar.ProcTime + SampleTime time.Time + FD sigar.ProcFDUsage + Env common.MapStr + cpuTotalPct float64 + cpuTotalPctNorm float64 } type ProcStats struct { @@ -73,7 +76,6 @@ func newProcess(pid int, cmdline string, env common.MapStr) (*Process, error) { State: getProcState(byte(state.State)), CmdLine: cmdline, Cwd: exe.Cwd, - Ctime: time.Now(), Env: env, } @@ -86,6 +88,8 @@ func newProcess(pid int, cmdline string, env common.MapStr) (*Process, error) { // variable should be saved with the process. If the argument is nil then all // environment variables are stored. func (proc *Process) getDetails(envPredicate func(string) bool) error { + proc.SampleTime = time.Now() + proc.Mem = sigar.ProcMem{} if err := proc.Mem.Get(proc.Pid); err != nil { return fmt.Errorf("error getting process mem for pid=%d: %v", proc.Pid, err) @@ -190,7 +194,7 @@ func GetProcMemPercentage(proc *Process, totalPhyMem uint64) float64 { perc := (float64(proc.Mem.Resident) / float64(totalPhyMem)) - return system.Round(perc, .5, 4) + return system.Round(perc) } func Pids() ([]int, error) { @@ -253,13 +257,16 @@ func (procStats *ProcStats) getProcessEvent(process *Process) common.MapStr { proc["cpu"] = common.MapStr{ "total": common.MapStr{ "pct": process.cpuTotalPct, + "norm": common.MapStr{ + "pct": process.cpuTotalPctNorm, + }, }, "start_time": unixTimeMsToTime(process.Cpu.StartTime), } if procStats.CpuTicks { - proc.Put("cpu.user", process.Cpu.User) - proc.Put("cpu.system", process.Cpu.Sys) + proc.Put("cpu.user.ticks", process.Cpu.User) + proc.Put("cpu.system.ticks", process.Cpu.Sys) proc.Put("cpu.total.ticks", process.Cpu.Total) } @@ -276,17 +283,29 @@ func (procStats *ProcStats) getProcessEvent(process *Process) common.MapStr { return proc } -func GetProcCpuPercentage(last *Process, current *Process) float64 { - - if last != nil && current != nil { - - dCPU := int64(current.Cpu.Total - last.Cpu.Total) - dt := float64(current.Ctime.Sub(last.Ctime).Nanoseconds()) / float64(1e6) // in milliseconds - perc := float64(dCPU) / dt - - return system.Round(perc, .5, 4) - } - return 0 +// GetProcCpuPercentage returns the percentage of total CPU time consumed by +// the process during the period between the given samples. Two percentages are +// returned (these must be multiplied by 100). The first is a normalized based +// on the number of cores such that the value ranges on [0, 1]. The second is +// not normalized and the value ranges on [0, number_of_cores]. +// +// Implementation note: The total system CPU time (including idle) is not +// provided so this method will resort to using the difference in wall-clock +// time multiplied by the number of cores as the total amount of CPU time +// available between samples. This could result in incorrect percentages if the +// wall-clock is adjusted (prior to Go 1.9) or the machine is suspended. +func GetProcCpuPercentage(s0, s1 *Process) (normalizedPct, pct float64) { + if s0 != nil && s1 != nil { + timeDelta := s1.SampleTime.Sub(s0.SampleTime) + timeDeltaMillis := timeDelta / time.Millisecond + totalCPUDeltaMillis := int64(s1.Cpu.Total - s0.Cpu.Total) + + pct := float64(totalCPUDeltaMillis) / float64(timeDeltaMillis) + normalizedPct := pct / float64(NumCPU) + + return system.Round(normalizedPct), system.Round(pct) + } + return 0, 0 } func (procStats *ProcStats) MatchProcess(name string) bool { @@ -367,7 +386,7 @@ func (procStats *ProcStats) GetProcStats() ([]common.MapStr, error) { newProcs[process.Pid] = process last := procStats.ProcsMap[process.Pid] - process.cpuTotalPct = GetProcCpuPercentage(last, process) + process.cpuTotalPctNorm, process.cpuTotalPct = GetProcCpuPercentage(last, process) processes = append(processes, *process) } } diff --git a/metricbeat/module/system/process/helper_test.go b/metricbeat/module/system/process/helper_test.go index 27781a5fa82..ef571c7559a 100644 --- a/metricbeat/module/system/process/helper_test.go +++ b/metricbeat/module/system/process/helper_test.go @@ -52,7 +52,7 @@ func TestGetProcess(t *testing.T) { assert.True(t, (process.Cpu.User >= 0)) assert.True(t, (process.Cpu.Sys >= 0)) - assert.True(t, (process.Ctime.Unix() <= time.Now().Unix())) + assert.True(t, (process.SampleTime.Unix() <= time.Now().Unix())) switch runtime.GOOS { case "darwin", "linux", "freebsd": @@ -114,35 +114,30 @@ func TestProcMemPercentage(t *testing.T) { } func TestProcCpuPercentage(t *testing.T) { - procStats := ProcStats{} - - ctime := time.Now() - - p2 := Process{ - Pid: 3545, + p1 := &Process{ Cpu: gosigar.ProcTime{ - User: 14794, - Sys: 47, - Total: 14841, + User: 11345, + Sys: 37, + Total: 11382, }, - Ctime: ctime, + SampleTime: time.Now(), } - p1 := Process{ - Pid: 3545, + p2 := &Process{ Cpu: gosigar.ProcTime{ - User: 11345, - Sys: 37, - Total: 11382, + User: 14794, + Sys: 47, + Total: 14841, }, - Ctime: ctime.Add(-1 * time.Second), + SampleTime: p1.SampleTime.Add(time.Second), } - procStats.ProcsMap = make(ProcsMap) - procStats.ProcsMap[p1.Pid] = &p1 + NumCPU = 48 + defer func() { NumCPU = runtime.NumCPU() }() - totalPercent := GetProcCpuPercentage(&p1, &p2) - assert.Equal(t, totalPercent, 3.459) + totalPercentNormalized, totalPercent := GetProcCpuPercentage(p1, p2) + assert.EqualValues(t, 0.0721, totalPercentNormalized) + assert.EqualValues(t, 3.459, totalPercent) } // BenchmarkGetProcess runs a benchmark of the GetProcess method with caching diff --git a/metricbeat/module/system/process/process.go b/metricbeat/module/system/process/process.go index 33334efb4d1..4c190608351 100644 --- a/metricbeat/module/system/process/process.go +++ b/metricbeat/module/system/process/process.go @@ -16,7 +16,7 @@ import ( "github.com/pkg/errors" ) -var debugf = logp.MakeDebug("system-process") +var debugf = logp.MakeDebug("system.process") func init() { if err := mb.Registry.AddMetricSet("system", "process", New, parse.EmptyHostParser); err != nil { @@ -32,32 +32,9 @@ type MetricSet struct { cacheCmdLine bool } -// includeTopConfig is the configuration for the "top N processes -// filtering" feature -type includeTopConfig struct { - Enabled bool `config:"enabled"` - ByCPU int `config:"by_cpu"` - ByMemory int `config:"by_memory"` -} - // New creates and returns a new MetricSet. func New(base mb.BaseMetricSet) (mb.MetricSet, error) { - config := struct { - Procs []string `config:"processes"` - Cgroups *bool `config:"process.cgroups.enabled"` - EnvWhitelist []string `config:"process.env.whitelist"` - CPUTicks bool `config:"cpu_ticks"` - CacheCmdLine bool `config:"process.cmdline.cache.enabled"` - IncludeTop includeTopConfig `config:"process.include_top_n"` - }{ - Procs: []string{".*"}, // collect all processes by default - CacheCmdLine: true, - IncludeTop: includeTopConfig{ - Enabled: true, - ByCPU: 0, - ByMemory: 0, - }, - } + config := defaultConfig if err := base.Module().UnpackConfig(&config); err != nil { return nil, err } @@ -67,7 +44,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { stats: &ProcStats{ Procs: config.Procs, EnvWhitelist: config.EnvWhitelist, - CpuTicks: config.CPUTicks, + CpuTicks: config.IncludeCPUTicks || (config.CPUTicks != nil && *config.CPUTicks), CacheCmdLine: config.CacheCmdLine, IncludeTop: config.IncludeTop, }, diff --git a/metricbeat/module/system/system.go b/metricbeat/module/system/system.go index 1e8c9401120..5ad1244b29d 100644 --- a/metricbeat/module/system/system.go +++ b/metricbeat/module/system/system.go @@ -2,7 +2,6 @@ package system import ( "flag" - "math" "sync" "github.com/elastic/beats/metricbeat/mb" @@ -34,17 +33,3 @@ func NewModule(base mb.BaseModule) (mb.Module, error) { return &Module{BaseModule: base, HostFS: *HostFS}, nil } - -func Round(val float64, roundOn float64, places int) (newVal float64) { - var round float64 - pow := math.Pow(10, float64(places)) - digit := pow * val - _, div := math.Modf(digit) - if div >= roundOn { - round = math.Ceil(digit) - } else { - round = math.Floor(digit) - } - newVal = round / pow - return -} diff --git a/metricbeat/module/system/system_test.go b/metricbeat/module/system/system_test.go deleted file mode 100644 index 7df24fc1569..00000000000 --- a/metricbeat/module/system/system_test.go +++ /dev/null @@ -1,3 +0,0 @@ -// +build !integration - -package system diff --git a/metricbeat/module/system/util.go b/metricbeat/module/system/util.go new file mode 100644 index 00000000000..7e6eb895e11 --- /dev/null +++ b/metricbeat/module/system/util.go @@ -0,0 +1,226 @@ +// +build darwin freebsd linux openbsd windows + +package system + +import ( + "math" + "runtime" + + sigar "github.com/elastic/gosigar" +) + +// maxDecimalPlaces is the maximum number of decimal places that the Round +// function return. +const maxDecimalPlaces = 4 + +var ( + // NumCPU is the number of CPU cores in the system. Changes to operating + // system CPU allocation after process startup are not reflected. + NumCPU = runtime.NumCPU() +) + +// CPU Monitor + +// CPUMonitor is used to monitor the overal CPU usage of the system. +type CPUMonitor struct { + lastSample *sigar.Cpu +} + +// Sample collects a new sample of the CPU usage metrics. +func (m *CPUMonitor) Sample() (*CPUMetrics, error) { + cpuSample := &sigar.Cpu{} + if err := cpuSample.Get(); err != nil { + return nil, err + } + + oldLastSample := m.lastSample + m.lastSample = cpuSample + return &CPUMetrics{oldLastSample, cpuSample}, nil +} + +type CPUPercentages struct { + User float64 + System float64 + Idle float64 + IOWait float64 + IRQ float64 + Nice float64 + SoftIRQ float64 + Steal float64 +} + +type CPUTicks struct { + User uint64 + System uint64 + Idle uint64 + IOWait uint64 + IRQ uint64 + Nice uint64 + SoftIRQ uint64 + Steal uint64 +} + +type CPUMetrics struct { + previousSample *sigar.Cpu + currentSample *sigar.Cpu +} + +// NormalizedPercentages returns CPU percentage usage information that is +// normalized by the number of CPU cores (NumCPU). The values will range from +// 0 to 100%. +func (m *CPUMetrics) NormalizedPercentages() CPUPercentages { + return cpuPercentages(m.previousSample, m.currentSample, 1) +} + +// Percentages returns CPU percentage usage information. The values range from +// 0 to 100% * NumCPU. +func (m *CPUMetrics) Percentages() CPUPercentages { + return cpuPercentages(m.previousSample, m.currentSample, NumCPU) +} + +// cpuPercentages calculates the amount of CPU time used between the two given +// samples. The CPU percentages are divided by given numCPU value and rounded +// using Round. +func cpuPercentages(s0, s1 *sigar.Cpu, numCPU int) CPUPercentages { + if s0 == nil || s1 == nil { + return CPUPercentages{} + } + + // timeDelta is the total amount of CPU time available across all CPU cores. + timeDelta := s1.Total() - s0.Total() + if timeDelta <= 0 { + return CPUPercentages{} + } + + calculatePct := func(v0, v1 uint64) float64 { + cpuDelta := int64(v1 - v0) + pct := float64(cpuDelta) / float64(timeDelta) + return Round(pct * float64(numCPU)) + } + + return CPUPercentages{ + User: calculatePct(s0.User, s1.User), + System: calculatePct(s0.Sys, s1.Sys), + Idle: calculatePct(s0.Idle, s1.Idle), + IOWait: calculatePct(s0.Wait, s1.Wait), + IRQ: calculatePct(s0.Irq, s1.Irq), + Nice: calculatePct(s0.Nice, s1.Nice), + SoftIRQ: calculatePct(s0.SoftIrq, s1.SoftIrq), + Steal: calculatePct(s0.Stolen, s1.Stolen), + } +} + +func (m *CPUMetrics) Ticks() CPUTicks { + return CPUTicks{ + User: m.currentSample.User, + System: m.currentSample.Sys, + Idle: m.currentSample.Idle, + IOWait: m.currentSample.Wait, + IRQ: m.currentSample.Irq, + Nice: m.currentSample.Nice, + SoftIRQ: m.currentSample.SoftIrq, + Steal: m.currentSample.Stolen, + } +} + +// CPU Core Monitor + +// CPUCoreMonitor is used to monitor the usage of individual CPU cores. +type CPUCoreMetrics CPUMetrics + +// Percentages returns CPU percentage usage information for the core. The values +// range from [0, 100%]. +func (m *CPUCoreMetrics) Percentages() CPUPercentages { return (*CPUMetrics)(m).NormalizedPercentages() } + +// Ticks returns the raw number of "ticks". The value is a counter (though it +// may roll over). +func (m *CPUCoreMetrics) Ticks() CPUTicks { return (*CPUMetrics)(m).Ticks() } + +// CPUCoresMonitor is used to monitor the usage information of all the CPU +// cores in the system. +type CPUCoresMonitor struct { + lastSample []sigar.Cpu +} + +// Sample collects a new sample of the metrics from all CPU cores. +func (m *CPUCoresMonitor) Sample() ([]CPUCoreMetrics, error) { + var cores sigar.CpuList + if err := cores.Get(); err != nil { + return nil, err + } + + lastSample := m.lastSample + m.lastSample = cores.List + + cpuMetrics := make([]CPUCoreMetrics, len(cores.List)) + for i := 0; i < len(cores.List); i++ { + if len(lastSample) > i { + cpuMetrics[i] = CPUCoreMetrics{&lastSample[i], &cores.List[i]} + } else { + cpuMetrics[i] = CPUCoreMetrics{nil, &cores.List[i]} + } + } + + return cpuMetrics, nil +} + +// CPU Load + +// Load returns CPU load information for the previous 1, 5, and 15 minute +// periods. +func Load() (*LoadMetrics, error) { + load := &sigar.LoadAverage{} + if err := load.Get(); err != nil { + return nil, err + } + + return &LoadMetrics{load}, nil +} + +type LoadMetrics struct { + sample *sigar.LoadAverage +} + +type LoadAverages struct { + OneMinute float64 + FiveMinute float64 + FifteenMinute float64 +} + +// Averages return the CPU load averages. These values should range from +// 0 to NumCPU. +func (m *LoadMetrics) Averages() LoadAverages { + return LoadAverages{ + OneMinute: Round(m.sample.One), + FiveMinute: Round(m.sample.Five), + FifteenMinute: Round(m.sample.Fifteen), + } +} + +// NormalizedAverages return the CPU load averages normalized by the NumCPU. +// These values should range from 0 to 1. +func (m *LoadMetrics) NormalizedAverages() LoadAverages { + return LoadAverages{ + OneMinute: Round(m.sample.One / float64(NumCPU)), + FiveMinute: Round(m.sample.Five / float64(NumCPU)), + FifteenMinute: Round(m.sample.Fifteen / float64(NumCPU)), + } +} + +// Helpers + +// Round rounds the given float64 value and ensures that it has a maximum of +// four decimal places. +func Round(val float64) (newVal float64) { + var round float64 + pow := math.Pow(10, float64(maxDecimalPlaces)) + digit := pow * val + _, div := math.Modf(digit) + if div >= 0.5 { + round = math.Ceil(digit) + } else { + round = math.Floor(digit) + } + newVal = round / pow + return +} diff --git a/metricbeat/module/system/util_test.go b/metricbeat/module/system/util_test.go new file mode 100644 index 00000000000..73139546cf1 --- /dev/null +++ b/metricbeat/module/system/util_test.go @@ -0,0 +1,126 @@ +// +build !integration +// +build darwin freebsd linux openbsd windows + +package system + +import ( + "runtime" + "testing" + + "github.com/elastic/gosigar" + "github.com/stretchr/testify/assert" +) + +func TestCPUMonitorSample(t *testing.T) { + cpu := &CPUMonitor{lastSample: &gosigar.Cpu{}} + s, err := cpu.Sample() + if err != nil { + t.Fatal(err) + } + + pct := s.Percentages() + assert.True(t, pct.User > 0) + assert.True(t, pct.System > 0) + + normPct := s.NormalizedPercentages() + assert.True(t, normPct.User > 0) + assert.True(t, normPct.System > 0) + assert.True(t, normPct.User <= 100) + assert.True(t, normPct.System <= 100) + + assert.True(t, pct.User > normPct.User) + assert.True(t, pct.System > normPct.System) + + ticks := s.Ticks() + assert.True(t, ticks.User > 0) + assert.True(t, ticks.System > 0) +} + +func TestCPUCoresMonitorSample(t *testing.T) { + cores := &CPUCoresMonitor{lastSample: make([]gosigar.Cpu, NumCPU)} + sample, err := cores.Sample() + if err != nil { + t.Fatal(err) + } + + for _, s := range sample { + normPct := s.Percentages() + assert.True(t, normPct.User > 0) + assert.True(t, normPct.User <= 100) + assert.True(t, normPct.System > 0) + assert.True(t, normPct.System <= 100) + assert.True(t, normPct.Idle > 0) + assert.True(t, normPct.Idle <= 100) + + ticks := s.Ticks() + assert.True(t, ticks.User > 0) + assert.True(t, ticks.System > 0) + } +} + +// TestCPUMetricsRounding tests that the returned percentages are rounded to +// four decimal places. +func TestCPUMetricsRounding(t *testing.T) { + sample := CPUMetrics{ + previousSample: &gosigar.Cpu{ + User: 10855311, + Sys: 2021040, + Idle: 17657874, + }, + currentSample: &gosigar.Cpu{ + User: 10855693, + Sys: 2021058, + Idle: 17657876, + }, + } + + pct := sample.NormalizedPercentages() + assert.Equal(t, pct.User, 0.9502) + assert.Equal(t, pct.System, 0.0448) +} + +// TestCPUMetricsPercentages tests that CPUMetrics returns the correct +// percentages and normalized percentages. +func TestCPUMetricsPercentages(t *testing.T) { + NumCPU = 10 + defer func() { NumCPU = runtime.NumCPU() }() + + // This test simulates 30% user and 70% system (normalized), or 3% and 7% + // respectively when there are 10 CPUs. + const user, system = 30., 70. + + s0 := gosigar.Cpu{ + User: 10000000, + Sys: 10000000, + Idle: 20000000, + Nice: 0, + } + s1 := gosigar.Cpu{ + User: s0.User + uint64(user), + Sys: s0.Sys + uint64(system), + Idle: s0.Idle, + Nice: 0, + } + sample := CPUMetrics{ + previousSample: &s0, + currentSample: &s1, + } + + pct := sample.NormalizedPercentages() + assert.EqualValues(t, .3, pct.User) + assert.EqualValues(t, .7, pct.System) + + pct = sample.Percentages() + assert.EqualValues(t, .3*float64(NumCPU), pct.User) + assert.EqualValues(t, .7*float64(NumCPU), pct.System) +} + +func TestRound(t *testing.T) { + assert.EqualValues(t, 0.5, Round(0.5)) + assert.EqualValues(t, 0.5, Round(0.50004)) + assert.EqualValues(t, 0.5001, Round(0.50005)) + + assert.EqualValues(t, 1234.5, Round(1234.5)) + assert.EqualValues(t, 1234.5, Round(1234.50004)) + assert.EqualValues(t, 1234.5001, Round(1234.50005)) +} diff --git a/metricbeat/tests/system/test_system.py b/metricbeat/tests/system/test_system.py index f40ed91a6f0..a339c19feaa 100644 --- a/metricbeat/tests/system/test_system.py +++ b/metricbeat/tests/system/test_system.py @@ -10,15 +10,19 @@ "softirq.pct", "steal.pct", "system.pct", "user.pct"] SYSTEM_CPU_FIELDS_ALL = ["cores", "idle.pct", "idle.ticks", "iowait.pct", "iowait.ticks", "irq.pct", "irq.ticks", "nice.pct", "nice.ticks", - "softirq.pct", "softirq.ticks", "steal.pct", "steal.ticks", "system.pct", "system.ticks", "user.pct", "user.ticks"] + "softirq.pct", "softirq.ticks", "steal.pct", "steal.ticks", "system.pct", "system.ticks", "user.pct", "user.ticks", + "idle.norm.pct", "iowait.norm.pct", "irq.norm.pct", "nice.norm.pct", "softirq.norm.pct", + "steal.norm.pct", "system.norm.pct", "user.norm.pct"] -SYSTEM_LOAD_FIELDS = ["1", "5", "15", "norm.1", "norm.5", "norm.15"] +SYSTEM_LOAD_FIELDS = ["cores", "1", "5", "15", "norm.1", "norm.5", "norm.15"] SYSTEM_CORE_FIELDS = ["id", "idle.pct", "iowait.pct", "irq.pct", "nice.pct", "softirq.pct", "steal.pct", "system.pct", "user.pct"] SYSTEM_CORE_FIELDS_ALL = SYSTEM_CORE_FIELDS + ["idle.ticks", "iowait.ticks", "irq.ticks", "nice.ticks", - "softirq.ticks", "steal.ticks", "system.ticks", "user.ticks"] + "softirq.ticks", "steal.ticks", "system.ticks", "user.ticks", + "idle.norm.pct", "iowait.norm.pct", "irq.norm.pct", "nice.norm.pct", + "softirq.norm.pct", "steal.norm.pct", "system.norm.pct", "user.norm.pct"] SYSTEM_DISKIO_FIELDS = ["name", "read.count", "write.count", "read.bytes", "write.bytes", "read.time", "write.time", "io.time"] @@ -78,7 +82,7 @@ def test_cpu_ticks_option(self): "metricsets": ["cpu"], "period": "5s", "extras": { - "cpu_ticks": True, + "cpu.metrics": ["percentages", "ticks"], }, }]) proc = self.start_beat() @@ -127,7 +131,7 @@ def test_core_with_cpu_ticks(self): "metricsets": ["core"], "period": "5s", "extras": { - "cpu_ticks": True, + "core.metrics": ["percentages", "ticks"], }, }]) proc = self.start_beat()