diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index dad05df46f4..a8bf249f96d 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -152,6 +152,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff] - Adding query APIs for metricsets and modules from metricbeat registry {pull}4102[4102] - Fixing nil pointer on prometheus collector when http response is nil {pull}4119[4119] - Add http module with json metricset. {pull}4092[4092] +- Add the option to the system module to include only the first top N processes by CPU and memory. {pull}4127[4127]. *Packetbeat* - Add `fields` and `fields_under_root` to packetbeat protocols configurations. {pull}3518[3518] diff --git a/metricbeat/docs/modules/system.asciidoc b/metricbeat/docs/modules/system.asciidoc index afbcb259e54..4393c77dae8 100644 --- a/metricbeat/docs/modules/system.asciidoc +++ b/metricbeat/docs/modules/system.asciidoc @@ -26,6 +26,25 @@ metricbeat.modules: metricsets: ["process"] processes: ['.*'] ---- + +*`process.include_top_n`*:: These options allow you to filter out all processes +that are not in the top N by CPU or memory, in order to reduce the number of +documents created. If both the `by_cpu` and `by_memory` options are used, the +reunion of the two tops is included. + +*`process.include_top_n.enabled`*:: Set to false to disable the top N feature and +include all processes, regardless of the other options. The default is `true`, +but nothing is filtered unless one of the other options (`by_cpu` or `by_memory`) +is set to a non-zero value. + +*`process.include_top_n.by_cpu`*:: How many processes to include from the top +by CPU. The processes are sorted by the `system.process.cpu.total.pct` field. +The default is 0. + +*`process.include_top_n.by_memory`*:: How many processes to include from the top +by memory. The processes are sorted by the `system.process.memory.rss.bytes` +field. The default is 0. + *`process.cgroups.enabled`*:: When the `process` metricset is enabled, you can use this boolean configuration option to disable cgroup metrics. By default cgroup metrics collection is enabled. @@ -100,6 +119,9 @@ metricbeat.modules: enabled: true period: 10s processes: ['.*'] + process.include_top_n: + by_cpu: 5 # include top 5 processes by CPU + by_memory: 5 # include top 5 processes by memory ---- [float] diff --git a/metricbeat/metricbeat.full.yml b/metricbeat/metricbeat.full.yml index 4191987a8b5..43debb001c7 100644 --- a/metricbeat/metricbeat.full.yml +++ b/metricbeat/metricbeat.full.yml @@ -64,6 +64,23 @@ metricbeat.modules: # if true, exports the CPU usage in ticks, together with the percentage values #cpu_ticks: false + # These options allow you to filter out all processes that are not + # in the top N by CPU or memory, in order to reduce the number of documents created. + # If both the `by_cpu` and `by_memory` options are used, the reunion of the two tops + # is included. + #process.include_top_n: + # + # Set to false to disable this feature and include all processes + #enabled: true + + # How many processes to include from the top by CPU. The processes are sorted + # by the `system.process.cpu.total.pct` field. + #by_cpu: 0 + + # How many processes to include from the top by memory. The processes are sorted + # by the `system.process.memory.rss.bytes` field. + #by_memory: 0 + # If false, cmdline of a process is not cached. #process.cmdline.cache.enabled: true diff --git a/metricbeat/metricbeat.yml b/metricbeat/metricbeat.yml index 657ea61828d..a8b56cb0d95 100644 --- a/metricbeat/metricbeat.yml +++ b/metricbeat/metricbeat.yml @@ -45,6 +45,9 @@ metricbeat.modules: enabled: true period: 10s processes: ['.*'] + process.include_top_n: + by_cpu: 5 # include top 5 processes by CPU + by_memory: 5 # include top 5 processes by memory diff --git a/metricbeat/module/system/_meta/config.full.yml b/metricbeat/module/system/_meta/config.full.yml index a1ed370e9aa..53d69fc970c 100644 --- a/metricbeat/module/system/_meta/config.full.yml +++ b/metricbeat/module/system/_meta/config.full.yml @@ -36,6 +36,23 @@ # if true, exports the CPU usage in ticks, together with the percentage values #cpu_ticks: false + # These options allow you to filter out all processes that are not + # in the top N by CPU or memory, in order to reduce the number of documents created. + # If both the `by_cpu` and `by_memory` options are used, the reunion of the two tops + # is included. + #process.include_top_n: + # + # Set to false to disable this feature and include all processes + #enabled: true + + # How many processes to include from the top by CPU. The processes are sorted + # by the `system.process.cpu.total.pct` field. + #by_cpu: 0 + + # How many processes to include from the top by memory. The processes are sorted + # by the `system.process.memory.rss.bytes` field. + #by_memory: 0 + # If false, cmdline of a process is not cached. #process.cmdline.cache.enabled: true diff --git a/metricbeat/module/system/_meta/config.yml b/metricbeat/module/system/_meta/config.yml index 474776c1862..a4a14b8e433 100644 --- a/metricbeat/module/system/_meta/config.yml +++ b/metricbeat/module/system/_meta/config.yml @@ -32,3 +32,6 @@ enabled: true period: 10s processes: ['.*'] + process.include_top_n: + by_cpu: 5 # include top 5 processes by CPU + by_memory: 5 # include top 5 processes by memory diff --git a/metricbeat/module/system/_meta/docs.asciidoc b/metricbeat/module/system/_meta/docs.asciidoc index a861aa9e5d5..87e37f1e9c7 100644 --- a/metricbeat/module/system/_meta/docs.asciidoc +++ b/metricbeat/module/system/_meta/docs.asciidoc @@ -21,6 +21,25 @@ metricbeat.modules: metricsets: ["process"] processes: ['.*'] ---- + +*`process.include_top_n`*:: These options allow you to filter out all processes +that are not in the top N by CPU or memory, in order to reduce the number of +documents created. If both the `by_cpu` and `by_memory` options are used, the +reunion of the two tops is included. + +*`process.include_top_n.enabled`*:: Set to false to disable the top N feature and +include all processes, regardless of the other options. The default is `true`, +but nothing is filtered unless one of the other options (`by_cpu` or `by_memory`) +is set to a non-zero value. + +*`process.include_top_n.by_cpu`*:: How many processes to include from the top +by CPU. The processes are sorted by the `system.process.cpu.total.pct` field. +The default is 0. + +*`process.include_top_n.by_memory`*:: How many processes to include from the top +by memory. The processes are sorted by the `system.process.memory.rss.bytes` +field. The default is 0. + *`process.cgroups.enabled`*:: When the `process` metricset is enabled, you can use this boolean configuration option to disable cgroup metrics. By default cgroup metrics collection is enabled. diff --git a/metricbeat/module/system/process/helper.go b/metricbeat/module/system/process/helper.go index ea9188b64f1..b3bf242bfdd 100644 --- a/metricbeat/module/system/process/helper.go +++ b/metricbeat/module/system/process/helper.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "runtime" + "sort" "strings" "time" @@ -20,19 +21,20 @@ import ( type ProcsMap map[int]*Process type Process struct { - Pid int `json:"pid"` - Ppid int `json:"ppid"` - Pgid int `json:"pgid"` - Name string `json:"name"` - Username string `json:"username"` - State string `json:"state"` - CmdLine string `json:"cmdline"` - Cwd string `json:"cwd"` - Mem sigar.ProcMem - Cpu sigar.ProcTime - Ctime time.Time - FD sigar.ProcFDUsage - Env common.MapStr + Pid int `json:"pid"` + Ppid int `json:"ppid"` + Pgid int `json:"pgid"` + Name string `json:"name"` + Username string `json:"username"` + State string `json:"state"` + CmdLine string `json:"cmdline"` + Cwd string `json:"cwd"` + Mem sigar.ProcMem + Cpu sigar.ProcTime + Ctime time.Time + FD sigar.ProcFDUsage + Env common.MapStr + cpuTotalPct float64 } type ProcStats struct { @@ -41,6 +43,7 @@ type ProcStats struct { CpuTicks bool EnvWhitelist []string CacheCmdLine bool + IncludeTop includeTopConfig procRegexps []match.Matcher // List of regular expressions used to whitelist processes. envRegexps []match.Matcher // List of regular expressions used to whitelist env vars. @@ -216,7 +219,7 @@ func getProcState(b byte) string { return "unknown" } -func (procStats *ProcStats) GetProcessEvent(process *Process, last *Process) common.MapStr { +func (procStats *ProcStats) getProcessEvent(process *Process) common.MapStr { proc := common.MapStr{ "pid": process.Pid, "ppid": process.Ppid, @@ -248,7 +251,7 @@ func (procStats *ProcStats) GetProcessEvent(process *Process, last *Process) com proc["cpu"] = common.MapStr{ "total": common.MapStr{ - "pct": GetProcCpuPercentage(last, process), + "pct": process.cpuTotalPct, }, "start_time": unixTimeMsToTime(process.Cpu.StartTime), } @@ -336,7 +339,7 @@ func (procStats *ProcStats) GetProcStats() ([]common.MapStr, error) { return nil, err } - processes := []common.MapStr{} + processes := []Process{} newProcs := make(ProcsMap, len(pids)) for _, pid := range pids { @@ -363,16 +366,64 @@ func (procStats *ProcStats) GetProcStats() ([]common.MapStr, error) { } newProcs[process.Pid] = process - last := procStats.ProcsMap[process.Pid] - proc := procStats.GetProcessEvent(process, last) + process.cpuTotalPct = GetProcCpuPercentage(last, process) + processes = append(processes, *process) + } + } + procStats.ProcsMap = newProcs + + processes = procStats.includeTopProcesses(processes) + logp.Debug("processes", "Filtered top processes down to %d processes", len(processes)) + + procs := []common.MapStr{} + for _, process := range processes { + proc := procStats.getProcessEvent(&process) + procs = append(procs, proc) + } + + return procs, nil +} - processes = append(processes, proc) +func (procStats *ProcStats) includeTopProcesses(processes []Process) []Process { + + if !procStats.IncludeTop.Enabled || + (procStats.IncludeTop.ByCPU == 0 && procStats.IncludeTop.ByMemory == 0) { + + return processes + } + + result := []Process{} + if procStats.IncludeTop.ByCPU > 0 { + sort.Slice(processes, func(i, j int) bool { + return processes[i].cpuTotalPct > processes[j].cpuTotalPct + }) + result = append(result, processes[:procStats.IncludeTop.ByCPU]...) + } + + if procStats.IncludeTop.ByMemory > 0 { + sort.Slice(processes, func(i, j int) bool { + return processes[i].Mem.Resident > processes[j].Mem.Resident + }) + for _, proc := range processes[:procStats.IncludeTop.ByMemory] { + if !isProcessInSlice(result, &proc) { + result = append(result, proc) + } } } - procStats.ProcsMap = newProcs - return processes, nil + return result +} + +// isProcessInSlice looks up proc in the processes slice and returns if +// found or not +func isProcessInSlice(processes []Process, proc *Process) bool { + for _, p := range processes { + if p.Pid == proc.Pid { + return true + } + } + return false } // isWhitelistedEnvVar returns true if the given variable name is a match for diff --git a/metricbeat/module/system/process/helper_test.go b/metricbeat/module/system/process/helper_test.go index b28a435b59a..27781a5fa82 100644 --- a/metricbeat/module/system/process/helper_test.go +++ b/metricbeat/module/system/process/helper_test.go @@ -6,6 +6,7 @@ package process import ( "os" "runtime" + "sort" "testing" "time" @@ -175,3 +176,118 @@ func BenchmarkGetProcess(b *testing.B) { procs[pid] = process } } + +func TestIncludeTopProcesses(t *testing.T) { + processes := []Process{ + { + Pid: 1, + cpuTotalPct: 10, + Mem: gosigar.ProcMem{Resident: 3000}, + }, + { + Pid: 2, + cpuTotalPct: 5, + Mem: gosigar.ProcMem{Resident: 4000}, + }, + { + Pid: 3, + cpuTotalPct: 7, + Mem: gosigar.ProcMem{Resident: 2000}, + }, + { + Pid: 4, + cpuTotalPct: 5, + Mem: gosigar.ProcMem{Resident: 8000}, + }, + { + Pid: 5, + cpuTotalPct: 12, + Mem: gosigar.ProcMem{Resident: 9000}, + }, + { + Pid: 6, + cpuTotalPct: 5, + Mem: gosigar.ProcMem{Resident: 7000}, + }, + { + Pid: 7, + cpuTotalPct: 80, + Mem: gosigar.ProcMem{Resident: 11000}, + }, + { + Pid: 8, + cpuTotalPct: 50, + Mem: gosigar.ProcMem{Resident: 13000}, + }, + { + Pid: 9, + cpuTotalPct: 15, + Mem: gosigar.ProcMem{Resident: 1000}, + }, + { + Pid: 10, + cpuTotalPct: 60, + Mem: gosigar.ProcMem{Resident: 500}, + }, + } + + tests := []struct { + Name string + Cfg includeTopConfig + ExpectedPids []int + }{ + { + Name: "top 2 processes by CPU", + Cfg: includeTopConfig{Enabled: true, ByCPU: 2}, + ExpectedPids: []int{7, 10}, + }, + { + Name: "top 4 processes by CPU", + Cfg: includeTopConfig{Enabled: true, ByCPU: 4}, + ExpectedPids: []int{7, 10, 8, 9}, + }, + { + Name: "top 2 processes by memory", + Cfg: includeTopConfig{Enabled: true, ByMemory: 2}, + ExpectedPids: []int{8, 7}, + }, + { + Name: "top 4 processes by memory", + Cfg: includeTopConfig{Enabled: true, ByMemory: 4}, + ExpectedPids: []int{8, 7, 5, 4}, + }, + { + Name: "top 2 processes by CPU + top 2 by memory", + Cfg: includeTopConfig{Enabled: true, ByCPU: 2, ByMemory: 2}, + ExpectedPids: []int{7, 10, 8}, + }, + { + Name: "top 4 processes by CPU + top 4 by memory", + Cfg: includeTopConfig{Enabled: true, ByCPU: 4, ByMemory: 4}, + ExpectedPids: []int{7, 10, 8, 9, 5, 4}, + }, + { + Name: "enabled false", + Cfg: includeTopConfig{Enabled: false, ByCPU: 4, ByMemory: 4}, + ExpectedPids: []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }, + { + Name: "enabled true but cpu & mem not configured", + Cfg: includeTopConfig{Enabled: true}, + ExpectedPids: []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }, + } + + for _, test := range tests { + procStats := ProcStats{IncludeTop: test.Cfg} + res := procStats.includeTopProcesses(processes) + + resPids := []int{} + for _, p := range res { + resPids = append(resPids, p.Pid) + } + sort.Ints(test.ExpectedPids) + sort.Ints(resPids) + assert.Equal(t, resPids, test.ExpectedPids, test.Name) + } +} diff --git a/metricbeat/module/system/process/process.go b/metricbeat/module/system/process/process.go index f7289223817..33334efb4d1 100644 --- a/metricbeat/module/system/process/process.go +++ b/metricbeat/module/system/process/process.go @@ -32,17 +32,31 @@ type MetricSet struct { cacheCmdLine bool } +// includeTopConfig is the configuration for the "top N processes +// filtering" feature +type includeTopConfig struct { + Enabled bool `config:"enabled"` + ByCPU int `config:"by_cpu"` + ByMemory int `config:"by_memory"` +} + // New creates and returns a new MetricSet. func New(base mb.BaseMetricSet) (mb.MetricSet, error) { config := struct { - Procs []string `config:"processes"` - Cgroups *bool `config:"process.cgroups.enabled"` - EnvWhitelist []string `config:"process.env.whitelist"` - CPUTicks bool `config:"cpu_ticks"` - CacheCmdLine bool `config:"process.cmdline.cache.enabled"` + Procs []string `config:"processes"` + Cgroups *bool `config:"process.cgroups.enabled"` + EnvWhitelist []string `config:"process.env.whitelist"` + CPUTicks bool `config:"cpu_ticks"` + CacheCmdLine bool `config:"process.cmdline.cache.enabled"` + IncludeTop includeTopConfig `config:"process.include_top_n"` }{ Procs: []string{".*"}, // collect all processes by default CacheCmdLine: true, + IncludeTop: includeTopConfig{ + Enabled: true, + ByCPU: 0, + ByMemory: 0, + }, } if err := base.Module().UnpackConfig(&config); err != nil { return nil, err @@ -55,6 +69,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { EnvWhitelist: config.EnvWhitelist, CpuTicks: config.CPUTicks, CacheCmdLine: config.CacheCmdLine, + IncludeTop: config.IncludeTop, }, } err := m.stats.InitProcStats()