From 0e9dde71b9afc6f6193a9a03f80c10b7710018ba Mon Sep 17 00:00:00 2001 From: Bomin Rahmani <38274348+BominRahmani@users.noreply.github.com> Date: Mon, 1 Jul 2024 11:41:25 -0400 Subject: [PATCH] [receiver/vcenter] Adds vCenter VM CPU readiness metric (#33608) **Description:** This PR adds the `vcenter.vm.cpu.readiness` metric. More information on this metric can be found [here](https://vdc-repo.vmware.com/vmwb-repository/dcr-public/d1902b0e-d479-46bf-8ac9-cee0e31e8ec0/07ce8dbd-db48-4261-9b8f-c6d3ad8ba472/vim.vm.Summary.QuickStats.html). **Link to tracking Issue:** #33607 **Testing:** The metric was scraped from a test vCenter environment, and golden test files were updated to reflect the addition of the metric. **Documentation:** Documentation was updated according to the metadata.yaml --- .../add_vcenter_vm_cpu_readiness_metric.yaml | 27 +++++++++ .chloggen/drop_support_for_vcenter_6.yaml | 27 +++++++++ receiver/vcenterreceiver/README.md | 1 - receiver/vcenterreceiver/documentation.md | 18 ++++++ .../internal/metadata/generated_config.go | 4 ++ .../metadata/generated_config_test.go | 2 + .../internal/metadata/generated_metrics.go | 60 +++++++++++++++++++ .../metadata/generated_metrics_test.go | 19 ++++++ .../internal/metadata/testdata/config.yaml | 4 ++ receiver/vcenterreceiver/metadata.yaml | 9 +++ receiver/vcenterreceiver/metrics.go | 4 ++ receiver/vcenterreceiver/scraper_test.go | 1 + .../metrics/expected-all-enabled.yaml | 24 ++++++++ 13 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 .chloggen/add_vcenter_vm_cpu_readiness_metric.yaml create mode 100644 .chloggen/drop_support_for_vcenter_6.yaml diff --git a/.chloggen/add_vcenter_vm_cpu_readiness_metric.yaml b/.chloggen/add_vcenter_vm_cpu_readiness_metric.yaml new file mode 100644 index 000000000000..0d3bc5bb2c8c --- /dev/null +++ b/.chloggen/add_vcenter_vm_cpu_readiness_metric.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: vcenterreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Adds vCenter CPU readiness metric for VMs." + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [33607] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/.chloggen/drop_support_for_vcenter_6.yaml b/.chloggen/drop_support_for_vcenter_6.yaml new file mode 100644 index 000000000000..33150f6ba536 --- /dev/null +++ b/.chloggen/drop_support_for_vcenter_6.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: breaking + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: vcenterreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Drops support for vCenter 6.7" + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [33607] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/vcenterreceiver/README.md b/receiver/vcenterreceiver/README.md index 3db398122586..c3c9c9b9730b 100644 --- a/receiver/vcenterreceiver/README.md +++ b/receiver/vcenterreceiver/README.md @@ -20,7 +20,6 @@ This receiver has been built to support ESXi and vCenter versions: - 8 - 7.0 -- 6.7 A “Read Only” user assigned to a vSphere with permissions to the vCenter server, cluster and all subsequent resources being monitored must be specified in order for the receiver to retrieve information about them. diff --git a/receiver/vcenterreceiver/documentation.md b/receiver/vcenterreceiver/documentation.md index 877097062058..3e1dcd1dbb8a 100644 --- a/receiver/vcenterreceiver/documentation.md +++ b/receiver/vcenterreceiver/documentation.md @@ -477,6 +477,24 @@ As measured over the most recent 20s interval. | ---- | ----------- | ------ | | object | The object on the virtual machine or host that is being reported on. | Any Str | +## Optional Metrics + +The following metrics are not emitted by default. Each of them can be enabled by applying the following configuration: + +```yaml +metrics: + : + enabled: true +``` + +### vcenter.vm.cpu.readiness + +Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU. + +| Unit | Metric Type | Value Type | +| ---- | ----------- | ---------- | +| % | Gauge | Int | + ## Resource Attributes | Name | Description | Values | Enabled | diff --git a/receiver/vcenterreceiver/internal/metadata/generated_config.go b/receiver/vcenterreceiver/internal/metadata/generated_config.go index 4d9dd23e8007..139463dd436c 100644 --- a/receiver/vcenterreceiver/internal/metadata/generated_config.go +++ b/receiver/vcenterreceiver/internal/metadata/generated_config.go @@ -52,6 +52,7 @@ type MetricsConfig struct { VcenterResourcePoolCPUUsage MetricConfig `mapstructure:"vcenter.resource_pool.cpu.usage"` VcenterResourcePoolMemoryShares MetricConfig `mapstructure:"vcenter.resource_pool.memory.shares"` VcenterResourcePoolMemoryUsage MetricConfig `mapstructure:"vcenter.resource_pool.memory.usage"` + VcenterVMCPUReadiness MetricConfig `mapstructure:"vcenter.vm.cpu.readiness"` VcenterVMCPUUsage MetricConfig `mapstructure:"vcenter.vm.cpu.usage"` VcenterVMCPUUtilization MetricConfig `mapstructure:"vcenter.vm.cpu.utilization"` VcenterVMDiskLatencyAvg MetricConfig `mapstructure:"vcenter.vm.disk.latency.avg"` @@ -144,6 +145,9 @@ func DefaultMetricsConfig() MetricsConfig { VcenterResourcePoolMemoryUsage: MetricConfig{ Enabled: true, }, + VcenterVMCPUReadiness: MetricConfig{ + Enabled: false, + }, VcenterVMCPUUsage: MetricConfig{ Enabled: true, }, diff --git a/receiver/vcenterreceiver/internal/metadata/generated_config_test.go b/receiver/vcenterreceiver/internal/metadata/generated_config_test.go index c3f40505e21e..7a73466b296f 100644 --- a/receiver/vcenterreceiver/internal/metadata/generated_config_test.go +++ b/receiver/vcenterreceiver/internal/metadata/generated_config_test.go @@ -49,6 +49,7 @@ func TestMetricsBuilderConfig(t *testing.T) { VcenterResourcePoolCPUUsage: MetricConfig{Enabled: true}, VcenterResourcePoolMemoryShares: MetricConfig{Enabled: true}, VcenterResourcePoolMemoryUsage: MetricConfig{Enabled: true}, + VcenterVMCPUReadiness: MetricConfig{Enabled: true}, VcenterVMCPUUsage: MetricConfig{Enabled: true}, VcenterVMCPUUtilization: MetricConfig{Enabled: true}, VcenterVMDiskLatencyAvg: MetricConfig{Enabled: true}, @@ -110,6 +111,7 @@ func TestMetricsBuilderConfig(t *testing.T) { VcenterResourcePoolCPUUsage: MetricConfig{Enabled: false}, VcenterResourcePoolMemoryShares: MetricConfig{Enabled: false}, VcenterResourcePoolMemoryUsage: MetricConfig{Enabled: false}, + VcenterVMCPUReadiness: MetricConfig{Enabled: false}, VcenterVMCPUUsage: MetricConfig{Enabled: false}, VcenterVMCPUUtilization: MetricConfig{Enabled: false}, VcenterVMDiskLatencyAvg: MetricConfig{Enabled: false}, diff --git a/receiver/vcenterreceiver/internal/metadata/generated_metrics.go b/receiver/vcenterreceiver/internal/metadata/generated_metrics.go index f2df36e15406..3637e54f9164 100644 --- a/receiver/vcenterreceiver/internal/metadata/generated_metrics.go +++ b/receiver/vcenterreceiver/internal/metadata/generated_metrics.go @@ -1381,6 +1381,55 @@ func newMetricVcenterResourcePoolMemoryUsage(cfg MetricConfig) metricVcenterReso return m } +type metricVcenterVMCPUReadiness struct { + data pmetric.Metric // data buffer for generated metric. + config MetricConfig // metric config provided by user. + capacity int // max observed number of data points added to the metric. +} + +// init fills vcenter.vm.cpu.readiness metric with initial data. +func (m *metricVcenterVMCPUReadiness) init() { + m.data.SetName("vcenter.vm.cpu.readiness") + m.data.SetDescription("Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU.") + m.data.SetUnit("%") + m.data.SetEmptyGauge() +} + +func (m *metricVcenterVMCPUReadiness) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64) { + if !m.config.Enabled { + return + } + dp := m.data.Gauge().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricVcenterVMCPUReadiness) updateCapacity() { + if m.data.Gauge().DataPoints().Len() > m.capacity { + m.capacity = m.data.Gauge().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricVcenterVMCPUReadiness) emit(metrics pmetric.MetricSlice) { + if m.config.Enabled && m.data.Gauge().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() + } +} + +func newMetricVcenterVMCPUReadiness(cfg MetricConfig) metricVcenterVMCPUReadiness { + m := metricVcenterVMCPUReadiness{config: cfg} + if cfg.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m +} + type metricVcenterVMCPUUsage struct { data pmetric.Metric // data buffer for generated metric. config MetricConfig // metric config provided by user. @@ -2237,6 +2286,7 @@ type MetricsBuilder struct { metricVcenterResourcePoolCPUUsage metricVcenterResourcePoolCPUUsage metricVcenterResourcePoolMemoryShares metricVcenterResourcePoolMemoryShares metricVcenterResourcePoolMemoryUsage metricVcenterResourcePoolMemoryUsage + metricVcenterVMCPUReadiness metricVcenterVMCPUReadiness metricVcenterVMCPUUsage metricVcenterVMCPUUsage metricVcenterVMCPUUtilization metricVcenterVMCPUUtilization metricVcenterVMDiskLatencyAvg metricVcenterVMDiskLatencyAvg @@ -2266,6 +2316,9 @@ func WithStartTime(startTime pcommon.Timestamp) metricBuilderOption { } func NewMetricsBuilder(mbc MetricsBuilderConfig, settings receiver.Settings, options ...metricBuilderOption) *MetricsBuilder { + if !mbc.Metrics.VcenterVMCPUReadiness.enabledSetByUser { + settings.Logger.Warn("[WARNING] Please set `enabled` field explicitly for `vcenter.vm.cpu.readiness`: this metric will be enabled by default starting in release v0.105.0") + } mb := &MetricsBuilder{ config: mbc, startTime: pcommon.NewTimestampFromTime(time.Now()), @@ -2295,6 +2348,7 @@ func NewMetricsBuilder(mbc MetricsBuilderConfig, settings receiver.Settings, opt metricVcenterResourcePoolCPUUsage: newMetricVcenterResourcePoolCPUUsage(mbc.Metrics.VcenterResourcePoolCPUUsage), metricVcenterResourcePoolMemoryShares: newMetricVcenterResourcePoolMemoryShares(mbc.Metrics.VcenterResourcePoolMemoryShares), metricVcenterResourcePoolMemoryUsage: newMetricVcenterResourcePoolMemoryUsage(mbc.Metrics.VcenterResourcePoolMemoryUsage), + metricVcenterVMCPUReadiness: newMetricVcenterVMCPUReadiness(mbc.Metrics.VcenterVMCPUReadiness), metricVcenterVMCPUUsage: newMetricVcenterVMCPUUsage(mbc.Metrics.VcenterVMCPUUsage), metricVcenterVMCPUUtilization: newMetricVcenterVMCPUUtilization(mbc.Metrics.VcenterVMCPUUtilization), metricVcenterVMDiskLatencyAvg: newMetricVcenterVMDiskLatencyAvg(mbc.Metrics.VcenterVMDiskLatencyAvg), @@ -2471,6 +2525,7 @@ func (mb *MetricsBuilder) EmitForResource(rmo ...ResourceMetricsOption) { mb.metricVcenterResourcePoolCPUUsage.emit(ils.Metrics()) mb.metricVcenterResourcePoolMemoryShares.emit(ils.Metrics()) mb.metricVcenterResourcePoolMemoryUsage.emit(ils.Metrics()) + mb.metricVcenterVMCPUReadiness.emit(ils.Metrics()) mb.metricVcenterVMCPUUsage.emit(ils.Metrics()) mb.metricVcenterVMCPUUtilization.emit(ils.Metrics()) mb.metricVcenterVMDiskLatencyAvg.emit(ils.Metrics()) @@ -2638,6 +2693,11 @@ func (mb *MetricsBuilder) RecordVcenterResourcePoolMemoryUsageDataPoint(ts pcomm mb.metricVcenterResourcePoolMemoryUsage.recordDataPoint(mb.startTime, ts, val) } +// RecordVcenterVMCPUReadinessDataPoint adds a data point to vcenter.vm.cpu.readiness metric. +func (mb *MetricsBuilder) RecordVcenterVMCPUReadinessDataPoint(ts pcommon.Timestamp, val int64) { + mb.metricVcenterVMCPUReadiness.recordDataPoint(mb.startTime, ts, val) +} + // RecordVcenterVMCPUUsageDataPoint adds a data point to vcenter.vm.cpu.usage metric. func (mb *MetricsBuilder) RecordVcenterVMCPUUsageDataPoint(ts pcommon.Timestamp, val int64) { mb.metricVcenterVMCPUUsage.recordDataPoint(mb.startTime, ts, val) diff --git a/receiver/vcenterreceiver/internal/metadata/generated_metrics_test.go b/receiver/vcenterreceiver/internal/metadata/generated_metrics_test.go index 3c253c8721f3..717272d76770 100644 --- a/receiver/vcenterreceiver/internal/metadata/generated_metrics_test.go +++ b/receiver/vcenterreceiver/internal/metadata/generated_metrics_test.go @@ -62,6 +62,10 @@ func TestMetricsBuilder(t *testing.T) { mb := NewMetricsBuilder(loadMetricsBuilderConfig(t, test.name), settings, WithStartTime(start)) expectedWarnings := 0 + if test.metricsSet == testDataSetDefault { + assert.Equal(t, "[WARNING] Please set `enabled` field explicitly for `vcenter.vm.cpu.readiness`: this metric will be enabled by default starting in release v0.105.0", observedLogs.All()[expectedWarnings].Message) + expectedWarnings++ + } assert.Equal(t, expectedWarnings, observedLogs.Len()) @@ -164,6 +168,9 @@ func TestMetricsBuilder(t *testing.T) { allMetricsCount++ mb.RecordVcenterResourcePoolMemoryUsageDataPoint(ts, 1) + allMetricsCount++ + mb.RecordVcenterVMCPUReadinessDataPoint(ts, 1) + defaultMetricsCount++ allMetricsCount++ mb.RecordVcenterVMCPUUsageDataPoint(ts, 1) @@ -630,6 +637,18 @@ func TestMetricsBuilder(t *testing.T) { assert.Equal(t, ts, dp.Timestamp()) assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) assert.Equal(t, int64(1), dp.IntValue()) + case "vcenter.vm.cpu.readiness": + assert.False(t, validatedMetrics["vcenter.vm.cpu.readiness"], "Found a duplicate in the metrics slice: vcenter.vm.cpu.readiness") + validatedMetrics["vcenter.vm.cpu.readiness"] = true + assert.Equal(t, pmetric.MetricTypeGauge, ms.At(i).Type()) + assert.Equal(t, 1, ms.At(i).Gauge().DataPoints().Len()) + assert.Equal(t, "Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU.", ms.At(i).Description()) + assert.Equal(t, "%", ms.At(i).Unit()) + dp := ms.At(i).Gauge().DataPoints().At(0) + assert.Equal(t, start, dp.StartTimestamp()) + assert.Equal(t, ts, dp.Timestamp()) + assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) + assert.Equal(t, int64(1), dp.IntValue()) case "vcenter.vm.cpu.usage": assert.False(t, validatedMetrics["vcenter.vm.cpu.usage"], "Found a duplicate in the metrics slice: vcenter.vm.cpu.usage") validatedMetrics["vcenter.vm.cpu.usage"] = true diff --git a/receiver/vcenterreceiver/internal/metadata/testdata/config.yaml b/receiver/vcenterreceiver/internal/metadata/testdata/config.yaml index c39cfd4de7f4..c825fb72278a 100644 --- a/receiver/vcenterreceiver/internal/metadata/testdata/config.yaml +++ b/receiver/vcenterreceiver/internal/metadata/testdata/config.yaml @@ -49,6 +49,8 @@ all_set: enabled: true vcenter.resource_pool.memory.usage: enabled: true + vcenter.vm.cpu.readiness: + enabled: true vcenter.vm.cpu.usage: enabled: true vcenter.vm.cpu.utilization: @@ -156,6 +158,8 @@ none_set: enabled: false vcenter.resource_pool.memory.usage: enabled: false + vcenter.vm.cpu.readiness: + enabled: false vcenter.vm.cpu.usage: enabled: false vcenter.vm.cpu.utilization: diff --git a/receiver/vcenterreceiver/metadata.yaml b/receiver/vcenterreceiver/metadata.yaml index c0d92d53e359..fb9209bb9dd9 100644 --- a/receiver/vcenterreceiver/metadata.yaml +++ b/receiver/vcenterreceiver/metadata.yaml @@ -441,6 +441,15 @@ metrics: value_type: int aggregation_temporality: cumulative attributes: [] + vcenter.vm.cpu.readiness: + enabled: false + description: Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU. + unit: "%" + gauge: + value_type: int + attributes: [] + warnings: + if_enabled_not_set: "this metric will be enabled by default starting in release v0.105.0" vcenter.vm.memory.utilization: enabled: true description: The memory utilization of the VM. diff --git a/receiver/vcenterreceiver/metrics.go b/receiver/vcenterreceiver/metrics.go index e83083c23dac..12bd7dbf6d73 100644 --- a/receiver/vcenterreceiver/metrics.go +++ b/receiver/vcenterreceiver/metrics.go @@ -140,6 +140,10 @@ func (v *vcenterMetricScraper) recordVMStats( return } v.mb.RecordVcenterVMCPUUtilizationDataPoint(ts, 100*float64(cpuUsage)/float64(cpuLimit)) + + cpuReadiness := vm.Summary.QuickStats.OverallCpuReadiness + v.mb.RecordVcenterVMCPUReadinessDataPoint(ts, int64(cpuReadiness)) + } var hostPerfMetricList = []string{ diff --git a/receiver/vcenterreceiver/scraper_test.go b/receiver/vcenterreceiver/scraper_test.go index 084d91551a46..a4e161a3aaab 100644 --- a/receiver/vcenterreceiver/scraper_test.go +++ b/receiver/vcenterreceiver/scraper_test.go @@ -39,6 +39,7 @@ func TestScrapeConfigsEnabled(t *testing.T) { defer mockServer.Close() optConfigs := metadata.DefaultMetricsBuilderConfig() + optConfigs.Metrics.VcenterVMCPUReadiness.Enabled = true cfg := &Config{ MetricsBuilderConfig: optConfigs, diff --git a/receiver/vcenterreceiver/testdata/metrics/expected-all-enabled.yaml b/receiver/vcenterreceiver/testdata/metrics/expected-all-enabled.yaml index aded1a42f8ae..29ee9750e59c 100644 --- a/receiver/vcenterreceiver/testdata/metrics/expected-all-enabled.yaml +++ b/receiver/vcenterreceiver/testdata/metrics/expected-all-enabled.yaml @@ -5212,6 +5212,14 @@ resourceMetrics: stringValue: CentOS 9 scopeMetrics: - metrics: + - description: Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: vcenter.vm.cpu.readiness + unit: '%' - description: The amount of CPU used by the VM. name: vcenter.vm.cpu.usage sum: @@ -5841,6 +5849,14 @@ resourceMetrics: stringValue: CentOS 7 scopeMetrics: - metrics: + - description: Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: vcenter.vm.cpu.readiness + unit: '%' - description: The amount of CPU used by the VM. name: vcenter.vm.cpu.usage sum: @@ -6470,6 +6486,14 @@ resourceMetrics: stringValue: CentOS 8 scopeMetrics: - metrics: + - description: Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: vcenter.vm.cpu.readiness + unit: '%' - description: The amount of CPU used by the VM. name: vcenter.vm.cpu.usage sum: