diff --git a/.chloggen/dockerstats-onlinecpus-failcnt.yaml b/.chloggen/dockerstats-onlinecpus-failcnt.yaml new file mode 100644 index 000000000000..b8b9c15839b9 --- /dev/null +++ b/.chloggen/dockerstats-onlinecpus-failcnt.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: dockerstatsreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: add metrics for online CPU count and memory fails count + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [31366] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/dockerstatsreceiver/documentation.md b/receiver/dockerstatsreceiver/documentation.md index abfa0fd6bc10..3840f9f6a072 100644 --- a/receiver/dockerstatsreceiver/documentation.md +++ b/receiver/dockerstatsreceiver/documentation.md @@ -306,6 +306,14 @@ This metric is only reported if the container has limits set with -cpus, -cpuset | ---- | ----------- | ---------- | | {cpus} | Gauge | Double | +### container.cpu.logical.count + +Number of cores available to the container. + +| Unit | Metric Type | Value Type | +| ---- | ----------- | ---------- | +| {cpus} | Gauge | Int | + ### container.cpu.shares CPU shares set for the container. @@ -406,6 +414,14 @@ Bytes that are waiting to get written back to the disk, from this cgroup (Only a | ---- | ----------- | ---------- | ----------------------- | --------- | | By | Sum | Int | Cumulative | false | +### container.memory.fails + +Number of times the memory limit was hit. + +| Unit | Metric Type | Value Type | Aggregation Temporality | Monotonic | +| ---- | ----------- | ---------- | ----------------------- | --------- | +| {fails} | Sum | Int | Cumulative | true | + ### container.memory.hierarchical_memory_limit The maximum amount of physical memory that can be used by the processes of this control group (Only available with cgroups v1). diff --git a/receiver/dockerstatsreceiver/internal/metadata/generated_config.go b/receiver/dockerstatsreceiver/internal/metadata/generated_config.go index 50e8e315bb3d..94ce4b4db8bc 100644 --- a/receiver/dockerstatsreceiver/internal/metadata/generated_config.go +++ b/receiver/dockerstatsreceiver/internal/metadata/generated_config.go @@ -34,6 +34,7 @@ type MetricsConfig struct { ContainerBlockioIoWaitTimeRecursive MetricConfig `mapstructure:"container.blockio.io_wait_time_recursive"` ContainerBlockioSectorsRecursive MetricConfig `mapstructure:"container.blockio.sectors_recursive"` ContainerCPULimit MetricConfig `mapstructure:"container.cpu.limit"` + ContainerCPULogicalCount MetricConfig `mapstructure:"container.cpu.logical.count"` ContainerCPUShares MetricConfig `mapstructure:"container.cpu.shares"` ContainerCPUThrottlingDataPeriods MetricConfig `mapstructure:"container.cpu.throttling_data.periods"` ContainerCPUThrottlingDataThrottledPeriods MetricConfig `mapstructure:"container.cpu.throttling_data.throttled_periods"` @@ -49,6 +50,7 @@ type MetricsConfig struct { ContainerMemoryAnon MetricConfig `mapstructure:"container.memory.anon"` ContainerMemoryCache MetricConfig `mapstructure:"container.memory.cache"` ContainerMemoryDirty MetricConfig `mapstructure:"container.memory.dirty"` + ContainerMemoryFails MetricConfig `mapstructure:"container.memory.fails"` ContainerMemoryFile MetricConfig `mapstructure:"container.memory.file"` ContainerMemoryHierarchicalMemoryLimit MetricConfig `mapstructure:"container.memory.hierarchical_memory_limit"` ContainerMemoryHierarchicalMemswLimit MetricConfig `mapstructure:"container.memory.hierarchical_memsw_limit"` @@ -125,6 +127,9 @@ func DefaultMetricsConfig() MetricsConfig { ContainerCPULimit: MetricConfig{ Enabled: false, }, + ContainerCPULogicalCount: MetricConfig{ + Enabled: false, + }, ContainerCPUShares: MetricConfig{ Enabled: false, }, @@ -170,6 +175,9 @@ func DefaultMetricsConfig() MetricsConfig { ContainerMemoryDirty: MetricConfig{ Enabled: false, }, + ContainerMemoryFails: MetricConfig{ + Enabled: false, + }, ContainerMemoryFile: MetricConfig{ Enabled: true, }, diff --git a/receiver/dockerstatsreceiver/internal/metadata/generated_config_test.go b/receiver/dockerstatsreceiver/internal/metadata/generated_config_test.go index c44f0a576287..645dd16efd1a 100644 --- a/receiver/dockerstatsreceiver/internal/metadata/generated_config_test.go +++ b/receiver/dockerstatsreceiver/internal/metadata/generated_config_test.go @@ -35,6 +35,7 @@ func TestMetricsBuilderConfig(t *testing.T) { ContainerBlockioIoWaitTimeRecursive: MetricConfig{Enabled: true}, ContainerBlockioSectorsRecursive: MetricConfig{Enabled: true}, ContainerCPULimit: MetricConfig{Enabled: true}, + ContainerCPULogicalCount: MetricConfig{Enabled: true}, ContainerCPUShares: MetricConfig{Enabled: true}, ContainerCPUThrottlingDataPeriods: MetricConfig{Enabled: true}, ContainerCPUThrottlingDataThrottledPeriods: MetricConfig{Enabled: true}, @@ -50,6 +51,7 @@ func TestMetricsBuilderConfig(t *testing.T) { ContainerMemoryAnon: MetricConfig{Enabled: true}, ContainerMemoryCache: MetricConfig{Enabled: true}, ContainerMemoryDirty: MetricConfig{Enabled: true}, + ContainerMemoryFails: MetricConfig{Enabled: true}, ContainerMemoryFile: MetricConfig{Enabled: true}, ContainerMemoryHierarchicalMemoryLimit: MetricConfig{Enabled: true}, ContainerMemoryHierarchicalMemswLimit: MetricConfig{Enabled: true}, @@ -120,6 +122,7 @@ func TestMetricsBuilderConfig(t *testing.T) { ContainerBlockioIoWaitTimeRecursive: MetricConfig{Enabled: false}, ContainerBlockioSectorsRecursive: MetricConfig{Enabled: false}, ContainerCPULimit: MetricConfig{Enabled: false}, + ContainerCPULogicalCount: MetricConfig{Enabled: false}, ContainerCPUShares: MetricConfig{Enabled: false}, ContainerCPUThrottlingDataPeriods: MetricConfig{Enabled: false}, ContainerCPUThrottlingDataThrottledPeriods: MetricConfig{Enabled: false}, @@ -135,6 +138,7 @@ func TestMetricsBuilderConfig(t *testing.T) { ContainerMemoryAnon: MetricConfig{Enabled: false}, ContainerMemoryCache: MetricConfig{Enabled: false}, ContainerMemoryDirty: MetricConfig{Enabled: false}, + ContainerMemoryFails: MetricConfig{Enabled: false}, ContainerMemoryFile: MetricConfig{Enabled: false}, ContainerMemoryHierarchicalMemoryLimit: MetricConfig{Enabled: false}, ContainerMemoryHierarchicalMemswLimit: MetricConfig{Enabled: false}, diff --git a/receiver/dockerstatsreceiver/internal/metadata/generated_metrics.go b/receiver/dockerstatsreceiver/internal/metadata/generated_metrics.go index c2c39c061aee..547d5410715a 100644 --- a/receiver/dockerstatsreceiver/internal/metadata/generated_metrics.go +++ b/receiver/dockerstatsreceiver/internal/metadata/generated_metrics.go @@ -501,6 +501,55 @@ func newMetricContainerCPULimit(cfg MetricConfig) metricContainerCPULimit { return m } +type metricContainerCPULogicalCount struct { + data pmetric.Metric // data buffer for generated metric. + config MetricConfig // metric config provided by user. + capacity int // max observed number of data points added to the metric. +} + +// init fills container.cpu.logical.count metric with initial data. +func (m *metricContainerCPULogicalCount) init() { + m.data.SetName("container.cpu.logical.count") + m.data.SetDescription("Number of cores available to the container.") + m.data.SetUnit("{cpus}") + m.data.SetEmptyGauge() +} + +func (m *metricContainerCPULogicalCount) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64) { + if !m.config.Enabled { + return + } + dp := m.data.Gauge().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricContainerCPULogicalCount) updateCapacity() { + if m.data.Gauge().DataPoints().Len() > m.capacity { + m.capacity = m.data.Gauge().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricContainerCPULogicalCount) emit(metrics pmetric.MetricSlice) { + if m.config.Enabled && m.data.Gauge().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() + } +} + +func newMetricContainerCPULogicalCount(cfg MetricConfig) metricContainerCPULogicalCount { + m := metricContainerCPULogicalCount{config: cfg} + if cfg.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m +} + type metricContainerCPUShares struct { data pmetric.Metric // data buffer for generated metric. config MetricConfig // metric config provided by user. @@ -1264,6 +1313,57 @@ func newMetricContainerMemoryDirty(cfg MetricConfig) metricContainerMemoryDirty return m } +type metricContainerMemoryFails struct { + data pmetric.Metric // data buffer for generated metric. + config MetricConfig // metric config provided by user. + capacity int // max observed number of data points added to the metric. +} + +// init fills container.memory.fails metric with initial data. +func (m *metricContainerMemoryFails) init() { + m.data.SetName("container.memory.fails") + m.data.SetDescription("Number of times the memory limit was hit.") + m.data.SetUnit("{fails}") + m.data.SetEmptySum() + m.data.Sum().SetIsMonotonic(true) + m.data.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) +} + +func (m *metricContainerMemoryFails) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64) { + if !m.config.Enabled { + return + } + dp := m.data.Sum().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricContainerMemoryFails) updateCapacity() { + if m.data.Sum().DataPoints().Len() > m.capacity { + m.capacity = m.data.Sum().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricContainerMemoryFails) emit(metrics pmetric.MetricSlice) { + if m.config.Enabled && m.data.Sum().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() + } +} + +func newMetricContainerMemoryFails(cfg MetricConfig) metricContainerMemoryFails { + m := metricContainerMemoryFails{config: cfg} + if cfg.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m +} + type metricContainerMemoryFile struct { data pmetric.Metric // data buffer for generated metric. config MetricConfig // metric config provided by user. @@ -3588,6 +3688,7 @@ type MetricsBuilder struct { metricContainerBlockioIoWaitTimeRecursive metricContainerBlockioIoWaitTimeRecursive metricContainerBlockioSectorsRecursive metricContainerBlockioSectorsRecursive metricContainerCPULimit metricContainerCPULimit + metricContainerCPULogicalCount metricContainerCPULogicalCount metricContainerCPUShares metricContainerCPUShares metricContainerCPUThrottlingDataPeriods metricContainerCPUThrottlingDataPeriods metricContainerCPUThrottlingDataThrottledPeriods metricContainerCPUThrottlingDataThrottledPeriods @@ -3603,6 +3704,7 @@ type MetricsBuilder struct { metricContainerMemoryAnon metricContainerMemoryAnon metricContainerMemoryCache metricContainerMemoryCache metricContainerMemoryDirty metricContainerMemoryDirty + metricContainerMemoryFails metricContainerMemoryFails metricContainerMemoryFile metricContainerMemoryFile metricContainerMemoryHierarchicalMemoryLimit metricContainerMemoryHierarchicalMemoryLimit metricContainerMemoryHierarchicalMemswLimit metricContainerMemoryHierarchicalMemswLimit @@ -3675,6 +3777,7 @@ func NewMetricsBuilder(mbc MetricsBuilderConfig, settings receiver.CreateSetting metricContainerBlockioIoWaitTimeRecursive: newMetricContainerBlockioIoWaitTimeRecursive(mbc.Metrics.ContainerBlockioIoWaitTimeRecursive), metricContainerBlockioSectorsRecursive: newMetricContainerBlockioSectorsRecursive(mbc.Metrics.ContainerBlockioSectorsRecursive), metricContainerCPULimit: newMetricContainerCPULimit(mbc.Metrics.ContainerCPULimit), + metricContainerCPULogicalCount: newMetricContainerCPULogicalCount(mbc.Metrics.ContainerCPULogicalCount), metricContainerCPUShares: newMetricContainerCPUShares(mbc.Metrics.ContainerCPUShares), metricContainerCPUThrottlingDataPeriods: newMetricContainerCPUThrottlingDataPeriods(mbc.Metrics.ContainerCPUThrottlingDataPeriods), metricContainerCPUThrottlingDataThrottledPeriods: newMetricContainerCPUThrottlingDataThrottledPeriods(mbc.Metrics.ContainerCPUThrottlingDataThrottledPeriods), @@ -3690,6 +3793,7 @@ func NewMetricsBuilder(mbc MetricsBuilderConfig, settings receiver.CreateSetting metricContainerMemoryAnon: newMetricContainerMemoryAnon(mbc.Metrics.ContainerMemoryAnon), metricContainerMemoryCache: newMetricContainerMemoryCache(mbc.Metrics.ContainerMemoryCache), metricContainerMemoryDirty: newMetricContainerMemoryDirty(mbc.Metrics.ContainerMemoryDirty), + metricContainerMemoryFails: newMetricContainerMemoryFails(mbc.Metrics.ContainerMemoryFails), metricContainerMemoryFile: newMetricContainerMemoryFile(mbc.Metrics.ContainerMemoryFile), metricContainerMemoryHierarchicalMemoryLimit: newMetricContainerMemoryHierarchicalMemoryLimit(mbc.Metrics.ContainerMemoryHierarchicalMemoryLimit), metricContainerMemoryHierarchicalMemswLimit: newMetricContainerMemoryHierarchicalMemswLimit(mbc.Metrics.ContainerMemoryHierarchicalMemswLimit), @@ -3806,6 +3910,7 @@ func (mb *MetricsBuilder) EmitForResource(rmo ...ResourceMetricsOption) { mb.metricContainerBlockioIoWaitTimeRecursive.emit(ils.Metrics()) mb.metricContainerBlockioSectorsRecursive.emit(ils.Metrics()) mb.metricContainerCPULimit.emit(ils.Metrics()) + mb.metricContainerCPULogicalCount.emit(ils.Metrics()) mb.metricContainerCPUShares.emit(ils.Metrics()) mb.metricContainerCPUThrottlingDataPeriods.emit(ils.Metrics()) mb.metricContainerCPUThrottlingDataThrottledPeriods.emit(ils.Metrics()) @@ -3821,6 +3926,7 @@ func (mb *MetricsBuilder) EmitForResource(rmo ...ResourceMetricsOption) { mb.metricContainerMemoryAnon.emit(ils.Metrics()) mb.metricContainerMemoryCache.emit(ils.Metrics()) mb.metricContainerMemoryDirty.emit(ils.Metrics()) + mb.metricContainerMemoryFails.emit(ils.Metrics()) mb.metricContainerMemoryFile.emit(ils.Metrics()) mb.metricContainerMemoryHierarchicalMemoryLimit.emit(ils.Metrics()) mb.metricContainerMemoryHierarchicalMemswLimit.emit(ils.Metrics()) @@ -3931,6 +4037,11 @@ func (mb *MetricsBuilder) RecordContainerCPULimitDataPoint(ts pcommon.Timestamp, mb.metricContainerCPULimit.recordDataPoint(mb.startTime, ts, val) } +// RecordContainerCPULogicalCountDataPoint adds a data point to container.cpu.logical.count metric. +func (mb *MetricsBuilder) RecordContainerCPULogicalCountDataPoint(ts pcommon.Timestamp, val int64) { + mb.metricContainerCPULogicalCount.recordDataPoint(mb.startTime, ts, val) +} + // RecordContainerCPUSharesDataPoint adds a data point to container.cpu.shares metric. func (mb *MetricsBuilder) RecordContainerCPUSharesDataPoint(ts pcommon.Timestamp, val int64) { mb.metricContainerCPUShares.recordDataPoint(mb.startTime, ts, val) @@ -4006,6 +4117,11 @@ func (mb *MetricsBuilder) RecordContainerMemoryDirtyDataPoint(ts pcommon.Timesta mb.metricContainerMemoryDirty.recordDataPoint(mb.startTime, ts, val) } +// RecordContainerMemoryFailsDataPoint adds a data point to container.memory.fails metric. +func (mb *MetricsBuilder) RecordContainerMemoryFailsDataPoint(ts pcommon.Timestamp, val int64) { + mb.metricContainerMemoryFails.recordDataPoint(mb.startTime, ts, val) +} + // RecordContainerMemoryFileDataPoint adds a data point to container.memory.file metric. func (mb *MetricsBuilder) RecordContainerMemoryFileDataPoint(ts pcommon.Timestamp, val int64) { mb.metricContainerMemoryFile.recordDataPoint(mb.startTime, ts, val) diff --git a/receiver/dockerstatsreceiver/internal/metadata/generated_metrics_test.go b/receiver/dockerstatsreceiver/internal/metadata/generated_metrics_test.go index e52c3f062d2f..addd9e9bdb11 100644 --- a/receiver/dockerstatsreceiver/internal/metadata/generated_metrics_test.go +++ b/receiver/dockerstatsreceiver/internal/metadata/generated_metrics_test.go @@ -83,6 +83,9 @@ func TestMetricsBuilder(t *testing.T) { allMetricsCount++ mb.RecordContainerCPULimitDataPoint(ts, 1) + allMetricsCount++ + mb.RecordContainerCPULogicalCountDataPoint(ts, 1) + allMetricsCount++ mb.RecordContainerCPUSharesDataPoint(ts, 1) @@ -132,6 +135,9 @@ func TestMetricsBuilder(t *testing.T) { allMetricsCount++ mb.RecordContainerMemoryDirtyDataPoint(ts, 1) + allMetricsCount++ + mb.RecordContainerMemoryFailsDataPoint(ts, 1) + defaultMetricsCount++ allMetricsCount++ mb.RecordContainerMemoryFileDataPoint(ts, 1) @@ -502,6 +508,18 @@ func TestMetricsBuilder(t *testing.T) { assert.Equal(t, ts, dp.Timestamp()) assert.Equal(t, pmetric.NumberDataPointValueTypeDouble, dp.ValueType()) assert.Equal(t, float64(1), dp.DoubleValue()) + case "container.cpu.logical.count": + assert.False(t, validatedMetrics["container.cpu.logical.count"], "Found a duplicate in the metrics slice: container.cpu.logical.count") + validatedMetrics["container.cpu.logical.count"] = true + assert.Equal(t, pmetric.MetricTypeGauge, ms.At(i).Type()) + assert.Equal(t, 1, ms.At(i).Gauge().DataPoints().Len()) + assert.Equal(t, "Number of cores available to the container.", ms.At(i).Description()) + assert.Equal(t, "{cpus}", ms.At(i).Unit()) + dp := ms.At(i).Gauge().DataPoints().At(0) + assert.Equal(t, start, dp.StartTimestamp()) + assert.Equal(t, ts, dp.Timestamp()) + assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) + assert.Equal(t, int64(1), dp.IntValue()) case "container.cpu.shares": assert.False(t, validatedMetrics["container.cpu.shares"], "Found a duplicate in the metrics slice: container.cpu.shares") validatedMetrics["container.cpu.shares"] = true @@ -711,6 +729,20 @@ func TestMetricsBuilder(t *testing.T) { assert.Equal(t, ts, dp.Timestamp()) assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) assert.Equal(t, int64(1), dp.IntValue()) + case "container.memory.fails": + assert.False(t, validatedMetrics["container.memory.fails"], "Found a duplicate in the metrics slice: container.memory.fails") + validatedMetrics["container.memory.fails"] = true + assert.Equal(t, pmetric.MetricTypeSum, ms.At(i).Type()) + assert.Equal(t, 1, ms.At(i).Sum().DataPoints().Len()) + assert.Equal(t, "Number of times the memory limit was hit.", ms.At(i).Description()) + assert.Equal(t, "{fails}", ms.At(i).Unit()) + assert.Equal(t, true, ms.At(i).Sum().IsMonotonic()) + assert.Equal(t, pmetric.AggregationTemporalityCumulative, ms.At(i).Sum().AggregationTemporality()) + dp := ms.At(i).Sum().DataPoints().At(0) + assert.Equal(t, start, dp.StartTimestamp()) + assert.Equal(t, ts, dp.Timestamp()) + assert.Equal(t, pmetric.NumberDataPointValueTypeInt, dp.ValueType()) + assert.Equal(t, int64(1), dp.IntValue()) case "container.memory.file": assert.False(t, validatedMetrics["container.memory.file"], "Found a duplicate in the metrics slice: container.memory.file") validatedMetrics["container.memory.file"] = true diff --git a/receiver/dockerstatsreceiver/internal/metadata/testdata/config.yaml b/receiver/dockerstatsreceiver/internal/metadata/testdata/config.yaml index 91acbeaa86aa..1c5dc2133db9 100644 --- a/receiver/dockerstatsreceiver/internal/metadata/testdata/config.yaml +++ b/receiver/dockerstatsreceiver/internal/metadata/testdata/config.yaml @@ -19,6 +19,8 @@ all_set: enabled: true container.cpu.limit: enabled: true + container.cpu.logical.count: + enabled: true container.cpu.shares: enabled: true container.cpu.throttling_data.periods: @@ -49,6 +51,8 @@ all_set: enabled: true container.memory.dirty: enabled: true + container.memory.fails: + enabled: true container.memory.file: enabled: true container.memory.hierarchical_memory_limit: @@ -174,6 +178,8 @@ none_set: enabled: false container.cpu.limit: enabled: false + container.cpu.logical.count: + enabled: false container.cpu.shares: enabled: false container.cpu.throttling_data.periods: @@ -204,6 +210,8 @@ none_set: enabled: false container.memory.dirty: enabled: false + container.memory.fails: + enabled: false container.memory.file: enabled: false container.memory.hierarchical_memory_limit: diff --git a/receiver/dockerstatsreceiver/metadata.yaml b/receiver/dockerstatsreceiver/metadata.yaml index 961ea27d8219..20d833ef0ceb 100644 --- a/receiver/dockerstatsreceiver/metadata.yaml +++ b/receiver/dockerstatsreceiver/metadata.yaml @@ -152,6 +152,12 @@ metrics: unit: "1" gauge: value_type: int + container.cpu.logical.count: + enabled: false + description: "Number of cores available to the container." + unit: "{cpus}" + gauge: + value_type: int # Memory @@ -465,6 +471,14 @@ metrics: value_type: int aggregation_temporality: cumulative monotonic: false + container.memory.fails: + enabled: false + description: "Number of times the memory limit was hit." + unit: "{fails}" + sum: + value_type: int + aggregation_temporality: cumulative + monotonic: true # BlockIO (cgroup v1) and IO (cgroup v2) controllers diff --git a/receiver/dockerstatsreceiver/receiver.go b/receiver/dockerstatsreceiver/receiver.go index 071d737d6329..34fe519de302 100644 --- a/receiver/dockerstatsreceiver/receiver.go +++ b/receiver/dockerstatsreceiver/receiver.go @@ -173,6 +173,8 @@ func (r *metricsReceiver) recordMemoryMetrics(now pcommon.Timestamp, memoryStats r.mb.RecordContainerMemoryUsageMaxDataPoint(now, int64(memoryStats.MaxUsage)) + r.mb.RecordContainerMemoryFailsDataPoint(now, int64(memoryStats.Failcnt)) + recorders := map[string]func(pcommon.Timestamp, int64){ "cache": r.mb.RecordContainerMemoryCacheDataPoint, "total_cache": r.mb.RecordContainerMemoryTotalCacheDataPoint, @@ -267,6 +269,7 @@ func (r *metricsReceiver) recordCPUMetrics(now pcommon.Timestamp, cpuStats *dtyp r.mb.RecordContainerCPUThrottlingDataPeriodsDataPoint(now, int64(cpuStats.ThrottlingData.Periods)) r.mb.RecordContainerCPUThrottlingDataThrottledTimeDataPoint(now, int64(cpuStats.ThrottlingData.ThrottledTime)) r.mb.RecordContainerCPUUtilizationDataPoint(now, calculateCPUPercent(prevStats, cpuStats)) + r.mb.RecordContainerCPULogicalCountDataPoint(now, int64(cpuStats.OnlineCPUs)) for coreNum, v := range cpuStats.CPUUsage.PercpuUsage { r.mb.RecordContainerCPUUsagePercpuDataPoint(now, int64(v), fmt.Sprintf("cpu%s", strconv.Itoa(coreNum))) diff --git a/receiver/dockerstatsreceiver/receiver_test.go b/receiver/dockerstatsreceiver/receiver_test.go index c5b2844a407a..70a793413e36 100644 --- a/receiver/dockerstatsreceiver/receiver_test.go +++ b/receiver/dockerstatsreceiver/receiver_test.go @@ -53,6 +53,7 @@ var ( ContainerCPUUsageSystem: metricEnabled, ContainerCPUUsageTotal: metricEnabled, ContainerCPUUsageUsermode: metricEnabled, + ContainerCPULogicalCount: metricEnabled, ContainerMemoryActiveAnon: metricEnabled, ContainerMemoryActiveFile: metricEnabled, ContainerMemoryCache: metricEnabled, @@ -89,6 +90,7 @@ var ( ContainerMemoryUsageMax: metricEnabled, ContainerMemoryUsageTotal: metricEnabled, ContainerMemoryWriteback: metricEnabled, + ContainerMemoryFails: metricEnabled, ContainerNetworkIoUsageRxBytes: metricEnabled, ContainerNetworkIoUsageRxDropped: metricEnabled, ContainerNetworkIoUsageRxErrors: metricEnabled, diff --git a/receiver/dockerstatsreceiver/testdata/mock/cgroups_v2/expected_metrics.yaml b/receiver/dockerstatsreceiver/testdata/mock/cgroups_v2/expected_metrics.yaml index 756751d84ee6..0692645fe3bd 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/cgroups_v2/expected_metrics.yaml +++ b/receiver/dockerstatsreceiver/testdata/mock/cgroups_v2/expected_metrics.yaml @@ -52,6 +52,14 @@ resourceMetrics: timeUnixNano: "2000000" isMonotonic: true unit: By + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 2 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -165,6 +173,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: Amount of memory used to cache filesystem data, including tmpfs and shared memory (Only available with cgroups v2). name: container.memory.file sum: diff --git a/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/expected_metrics.yaml b/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/expected_metrics.yaml index 08d6c88004dd..b546a2e2df85 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/expected_metrics.yaml +++ b/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/expected_metrics.yaml @@ -112,6 +112,14 @@ resourceMetrics: timeUnixNano: "2000000" name: container.cpu.limit unit: '{cpus}' + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 2 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -225,6 +233,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "4" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: Amount of memory used to cache filesystem data, including tmpfs and shared memory (Only available with cgroups v2). name: container.memory.file sum: diff --git a/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/stats.json b/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/stats.json index 68509e8f1058..dc51d90f0cfb 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/stats.json +++ b/receiver/dockerstatsreceiver/testdata/mock/cpu_limit/stats.json @@ -83,6 +83,7 @@ } }, "memory_stats": { + "failcnt": 4, "usage": 2240512, "stats": { "active_anon": 4096, diff --git a/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/expected_metrics.yaml b/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/expected_metrics.yaml index 838e67b56f3b..77ae12c06cca 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/expected_metrics.yaml +++ b/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/expected_metrics.yaml @@ -195,6 +195,14 @@ resourceMetrics: timeUnixNano: "2000000" isMonotonic: true unit: '{operations}' + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 8 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -380,6 +388,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "4" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: The maximum amount of physical memory that can be used by the processes of this control group (Only available with cgroups v1). name: container.memory.hierarchical_memory_limit sum: diff --git a/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/stats.json b/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/stats.json index 4ec5a2c0df5d..269c0238f6e0 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/stats.json +++ b/receiver/dockerstatsreceiver/testdata/mock/no_pids_stats/stats.json @@ -109,6 +109,7 @@ }, "id": "10b703fb312b25e8368ab5a3bce3a1610d1cee5d71a94920f1a7adbc5b0cb326", "memory_stats": { + "failcnt": 4, "limit": 10449559552, "max_usage": 3932160, "stats": { diff --git a/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/expected_metrics.yaml b/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/expected_metrics.yaml index 59f5e428e8d1..efbaf5ff0508 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/expected_metrics.yaml +++ b/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/expected_metrics.yaml @@ -104,6 +104,14 @@ resourceMetrics: timeUnixNano: "2000000" isMonotonic: true unit: By + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 2 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -217,6 +225,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "4" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: Amount of memory used to cache filesystem data, including tmpfs and shared memory (Only available with cgroups v2). name: container.memory.file sum: diff --git a/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/stats.json b/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/stats.json index 1fa9e89505d0..90daf395f0aa 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/stats.json +++ b/receiver/dockerstatsreceiver/testdata/mock/pids_stats_max/stats.json @@ -83,6 +83,7 @@ } }, "memory_stats": { + "failcnt": 4, "usage": 2240512, "stats": { "active_anon": 4096, diff --git a/receiver/dockerstatsreceiver/testdata/mock/single_container/expected_metrics.yaml b/receiver/dockerstatsreceiver/testdata/mock/single_container/expected_metrics.yaml index bcbb45b86230..f4b4fe5ecf36 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/single_container/expected_metrics.yaml +++ b/receiver/dockerstatsreceiver/testdata/mock/single_container/expected_metrics.yaml @@ -201,6 +201,14 @@ resourceMetrics: timeUnixNano: "2000000" isMonotonic: true unit: '{operations}' + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 8 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -385,6 +393,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "4" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: The maximum amount of physical memory that can be used by the processes of this control group (Only available with cgroups v1). name: container.memory.hierarchical_memory_limit sum: diff --git a/receiver/dockerstatsreceiver/testdata/mock/single_container/stats.json b/receiver/dockerstatsreceiver/testdata/mock/single_container/stats.json index f0b5d6110ab1..33d8145d0cf8 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/single_container/stats.json +++ b/receiver/dockerstatsreceiver/testdata/mock/single_container/stats.json @@ -109,6 +109,7 @@ }, "id": "10b703fb312b25e8368ab5a3bce3a1610d1cee5d71a94920f1a7adbc5b0cb326", "memory_stats": { + "failcnt": 4, "limit": 10449559552, "max_usage": 3932160, "stats": { diff --git a/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/expected_metrics.yaml b/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/expected_metrics.yaml index 4784753987e7..634773c41d6a 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/expected_metrics.yaml +++ b/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/expected_metrics.yaml @@ -201,6 +201,14 @@ resourceMetrics: timeUnixNano: "2000000" isMonotonic: true unit: '{operations}' + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 8 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -386,6 +394,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "4" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: The maximum amount of physical memory that can be used by the processes of this control group (Only available with cgroups v1). name: container.memory.hierarchical_memory_limit sum: diff --git a/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/stats.json b/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/stats.json index 61a10904c65f..3cd6f3b32ac8 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/stats.json +++ b/receiver/dockerstatsreceiver/testdata/mock/single_container_with_optional_resource_attributes/stats.json @@ -109,6 +109,7 @@ }, "id": "73364842ef014441cac89fed05df19463b1230db25a31252cdf82e754f1ec581", "memory_stats": { + "failcnt": 4, "limit": 10449559552, "max_usage": 3932160, "stats": { diff --git a/receiver/dockerstatsreceiver/testdata/mock/two_containers/expected_metrics.yaml b/receiver/dockerstatsreceiver/testdata/mock/two_containers/expected_metrics.yaml index c3a0788872fc..759dda74e978 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/two_containers/expected_metrics.yaml +++ b/receiver/dockerstatsreceiver/testdata/mock/two_containers/expected_metrics.yaml @@ -195,6 +195,14 @@ resourceMetrics: timeUnixNano: "2000000" isMonotonic: true unit: '{operations}' + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 1 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -330,6 +338,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "4" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: The maximum amount of physical memory that can be used by the processes of this control group (Only available with cgroups v1). name: container.memory.hierarchical_memory_limit sum: @@ -962,6 +980,14 @@ resourceMetrics: timeUnixNano: "2000000" isMonotonic: true unit: '{operations}' + - description: 'Number of cores available to the container.' + gauge: + dataPoints: + - asInt: 1 + startTimeUnixNano: "1687762436124732000" + timeUnixNano: "1687762436137493000" + name: container.cpu.logical.count + unit: "{cpus}" - description: CPU shares set for the container. gauge: dataPoints: @@ -1097,6 +1123,16 @@ resourceMetrics: startTimeUnixNano: "1000000" timeUnixNano: "2000000" unit: By + - description: 'Number of times the memory limit was hit.' + name: container.memory.fails + sum: + isMonotonic: true + aggregationTemporality: 2 + dataPoints: + - asInt: "4" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: "{fails}" - description: The maximum amount of physical memory that can be used by the processes of this control group (Only available with cgroups v1). name: container.memory.hierarchical_memory_limit sum: diff --git a/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats1.json b/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats1.json index 9ca148e05517..6cd042489777 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats1.json +++ b/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats1.json @@ -102,6 +102,7 @@ }, "id": "89d28931fd8b95c8806343a532e9e76bf0a0b76ee8f19452b8f75dee1ebcebb7", "memory_stats": { + "failcnt": 4, "limit": 2074079232, "max_usage": 6201344, "stats": { diff --git a/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats2.json b/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats2.json index 2df1f9dc7a60..f0565da48162 100644 --- a/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats2.json +++ b/receiver/dockerstatsreceiver/testdata/mock/two_containers/stats2.json @@ -102,6 +102,7 @@ }, "id": "a359c0fc87c546b42d2ad32db7c978627f1d89b49cb3827a7b19ba97a1febcce", "memory_stats": { + "failcnt": 4, "limit": 2074079232, "max_usage": 6172672, "stats": {