From ff89f9545f7f223019dbef6537529bb539b21bfd Mon Sep 17 00:00:00 2001 From: Foxlik Date: Sat, 3 Dec 2016 21:12:28 +0100 Subject: [PATCH] Fix improper total of CPU times On linux, the cpu timer counters of user and nice include the respective guest and guest_nice counters. This results in improper calculation of percentages. Please see: https://github.com/torvalds/linux/blob/447976e/kernel/sched/cputime.c#L169 https://lists.linuxfoundation.org/pipermail/virtualization/2009-August/013459.html https://github.com/giampaolo/psutil/pull/940 --- CHANGELOG.md | 1 + plugins/inputs/system/cpu.go | 6 ++-- plugins/inputs/system/cpu_test.go | 52 +++++++++++++++---------------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ac4856c7b78..a980927eb84ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Features +- [#2123](https://github.com/influxdata/telegraf/pull/2123): Fix improper calculation of CPU percentages - [#1564](https://github.com/influxdata/telegraf/issues/1564): Use RFC3339 timestamps in log output. - [#1997](https://github.com/influxdata/telegraf/issues/1997): Non-default HTTP timeouts for RabbitMQ plugin. - [#2074](https://github.com/influxdata/telegraf/pull/2074): "discard" output plugin added, primarily for testing purposes. diff --git a/plugins/inputs/system/cpu.go b/plugins/inputs/system/cpu.go index 295c0d347aae3..3ed2606fac811 100644 --- a/plugins/inputs/system/cpu.go +++ b/plugins/inputs/system/cpu.go @@ -91,10 +91,10 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error { continue } fieldsG := map[string]interface{}{ - "usage_user": 100 * (cts.User - lastCts.User) / totalDelta, + "usage_user": 100 * (cts.User - lastCts.User - (cts.Guest - lastCts.Guest)) / totalDelta, "usage_system": 100 * (cts.System - lastCts.System) / totalDelta, "usage_idle": 100 * (cts.Idle - lastCts.Idle) / totalDelta, - "usage_nice": 100 * (cts.Nice - lastCts.Nice) / totalDelta, + "usage_nice": 100 * (cts.Nice - lastCts.Nice - (cts.GuestNice - lastCts.GuestNice)) / totalDelta, "usage_iowait": 100 * (cts.Iowait - lastCts.Iowait) / totalDelta, "usage_irq": 100 * (cts.Irq - lastCts.Irq) / totalDelta, "usage_softirq": 100 * (cts.Softirq - lastCts.Softirq) / totalDelta, @@ -112,7 +112,7 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error { func totalCpuTime(t cpu.TimesStat) float64 { total := t.User + t.System + t.Nice + t.Iowait + t.Irq + t.Softirq + t.Steal + - t.Guest + t.GuestNice + t.Idle + t.Idle return total } diff --git a/plugins/inputs/system/cpu_test.go b/plugins/inputs/system/cpu_test.go index f1f659e6f8268..e071f22c59585 100644 --- a/plugins/inputs/system/cpu_test.go +++ b/plugins/inputs/system/cpu_test.go @@ -17,29 +17,29 @@ func TestCPUStats(t *testing.T) { cts := cpu.TimesStat{ CPU: "cpu0", - User: 3.1, + User: 8.8, System: 8.2, Idle: 80.1, Nice: 1.3, - Iowait: 0.2, - Irq: 0.1, + Iowait: 0.8389, + Irq: 0.6, Softirq: 0.11, Steal: 0.0511, - Guest: 8.1, + Guest: 3.1, GuestNice: 0.324, } cts2 := cpu.TimesStat{ CPU: "cpu0", - User: 11.4, // increased by 8.3 + User: 24.9, // increased by 16.1 System: 10.9, // increased by 2.7 - Idle: 158.8699, // increased by 78.7699 (for total increase of 100) - Nice: 2.5, // increased by 1.2 - Iowait: 0.7, // increased by 0.5 - Irq: 1.2, // increased by 1.1 + Idle: 157.9798, // increased by 77.8798 (for total increase of 100) + Nice: 3.5, // increased by 2.2 + Iowait: 0.929, // increased by 0.0901 + Irq: 1.2, // increased by 0.6 Softirq: 0.31, // increased by 0.2 - Steal: 0.2812, // increased by 0.0001 - Guest: 12.9, // increased by 4.8 + Steal: 0.2812, // increased by 0.2301 + Guest: 11.4, // increased by 8.3 GuestNice: 2.524, // increased by 2.2 } @@ -56,15 +56,15 @@ func TestCPUStats(t *testing.T) { // Computed values are checked with delta > 0 becasue of floating point arithmatic // imprecision - assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 3.1, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 8.8, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_system", 8.2, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80.1, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_nice", 1.3, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.2, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "time_irq", 0.1, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.8389, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_irq", 0.6, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_softirq", 0.11, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_steal", 0.0511, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 8.1, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 3.1, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_guest_nice", 0.324, 0, cputags) mps2 := MockPS{} @@ -75,26 +75,26 @@ func TestCPUStats(t *testing.T) { err = cs.Gather(&acc) require.NoError(t, err) - assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 11.4, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 24.9, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_system", 10.9, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 158.8699, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "time_nice", 2.5, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.7, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 157.9798, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_nice", 3.5, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 0.929, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_irq", 1.2, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_softirq", 0.31, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_steal", 0.2812, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 12.9, 0, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "time_guest", 11.4, 0, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "time_guest_nice", 2.524, 0, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 8.3, 0.0005, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 7.8, 0.0005, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "usage_system", 2.7, 0.0005, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 78.7699, 0.0005, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "usage_nice", 1.2, 0.0005, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 0.5, 0.0005, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "usage_irq", 1.1, 0.0005, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 77.8798, 0.0005, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "usage_nice", 0, 0.0005, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 0.0901, 0.0005, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "usage_irq", 0.6, 0.0005, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "usage_softirq", 0.2, 0.0005, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "usage_steal", 0.2301, 0.0005, cputags) - assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest", 4.8, 0.0005, cputags) + assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest", 8.3, 0.0005, cputags) assertContainsTaggedFloat(t, &acc, "cpu", "usage_guest_nice", 2.2, 0.0005, cputags) }