Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core metrics collection - collect more memory related metrics #172146

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ const mockedResponse: StatusResponse = {
total_in_bytes: 0,
},
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),
Expand All @@ -96,6 +98,8 @@ const mockedResponse: StatusResponse = {
total_in_bytes: 0,
},
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,7 @@ describe('OsCgroupMetricsCollector', () => {
await collector.collect();

expect(gatherV2CgroupMetrics).toHaveBeenCalledTimes(1);
expect(gatherV2CgroupMetrics).toHaveBeenCalledWith({
cpuAcctPath: '/groupname',
cpuPath: '/groupname',
});
expect(gatherV2CgroupMetrics).toHaveBeenCalledWith('/groupname');
expect(gatherV1CgroupMetrics).toHaveBeenCalledTimes(0);
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,13 @@ export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetric
return {};
}

const args = { cpuAcctPath: this.cpuAcctPath!, cpuPath: this.cpuPath! };
// "await" to handle any errors here.
return await (this.isCgroup2 ? gatherV2CgroupMetrics(args) : gatherV1CgroupMetrics(args));
return await (this.isCgroup2
? gatherV2CgroupMetrics(this.cpuAcctPath!)
: gatherV1CgroupMetrics({
cpuAcctPath: this.cpuAcctPath!,
cpuPath: this.cpuPath!,
}));
Comment on lines +46 to +51
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored the signatures about v1/v2 detection a bit to get closer to reality (v2 -> a single group path)

} catch (err) {
this.noCgroupPresent = true;

Expand All @@ -67,10 +71,15 @@ export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetric
private async initializePaths(): Promise<void> {
if (this.hasPaths()) return;

const { data: cgroups, v2 } = await gatherInfo();
this.isCgroup2 = v2;
this.cpuPath = this.options.cpuPath || cgroups[GROUP_CPU];
this.cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT];
const result = await gatherInfo();
this.isCgroup2 = result.v2;
if (result.v2) {
this.cpuPath = result.path;
this.cpuAcctPath = result.path;
} else {
this.cpuPath = this.options.cpuPath || result.data[GROUP_CPU];
this.cpuAcctPath = this.options.cpuAcctPath || result.data[GROUP_CPUACCT];
}

// prevents undefined cgroup paths
this.noCgroupPresent = Boolean(!this.cpuPath || !this.cpuAcctPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ describe('gatherInfo', () => {
'/proc/self/cgroup': `0:controller:/path
1:controller2,controller3:/otherpath`,
});
const { data } = await gatherInfo();
expect(data).toEqual({
controller: '/path',
controller2: '/otherpath',
controller3: '/otherpath',
const result = await gatherInfo();
expect(result).toEqual({
v2: false,
data: {
controller: '/path',
controller2: '/otherpath',
controller3: '/otherpath',
},
});
});

Expand All @@ -30,15 +33,15 @@ describe('gatherInfo', () => {
'/proc/self/cgroup': `0:controller:/path
1:controller2,controller3:/otherpath`,
});
await expect(gatherInfo()).resolves.toMatchObject({ v2: false });
expect(await gatherInfo()).toMatchObject({ v2: false });
mockFs({
'/proc/self/cgroup': `

0::/path

`,
});
await expect(gatherInfo()).resolves.toMatchObject({ v2: true });
expect(await gatherInfo()).toMatchObject({ v2: true });
});

test('missing cgroup file', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
*/
import fs from 'fs/promises';

import { GROUP_CPU, GROUP_CPUACCT } from './constants';

const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
const CONTROLLER_SEPARATOR_RE = ',';
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
Expand All @@ -27,10 +25,15 @@ async function readProcSelf(): Promise<string[]> {
return data.split(/\n/).filter((line) => line.trim().length > 0);
}

interface Result {
data: Record<string, string>;
v2: boolean;
}
type Result =
| {
v2: true;
path: string;
}
| {
v2: false;
data: Record<string, string>;
};

export async function gatherInfo(): Promise<Result> {
const lines = await readProcSelf();
Expand All @@ -39,11 +42,8 @@ export async function gatherInfo(): Promise<Result> {
// eslint-disable-next-line prettier/prettier
const [/* '0' */, /* '' */, path] = lines[0].trim().split(':');
return {
data: {
[GROUP_CPU]: path,
[GROUP_CPUACCT]: path,
},
v2: true,
path,
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@

import type { OpsOsMetrics } from '@kbn/core-metrics-server';

export type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct'>;
export type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct' | 'cgroup_memory'>;
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@ system_usec 125968
nr_periods 123
nr_throttled 1
throttled_usec 123123`,
'/sys/fs/cgroup/memory.current': '9000',
'/sys/fs/cgroup/memory.swap.current': '42',
});

expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/', cpuPath: '/' })).toMatchInlineSnapshot(`
const metrics = await gatherV2CgroupMetrics('/');
expect(metrics).toMatchInlineSnapshot(`
Object {
"cgroup_memory": Object {
"current_in_bytes": 9000,
"swap_current_in_bytes": 42,
},
"cpu": Object {
"cfs_period_micros": 100000,
"cfs_quota_micros": -1,
Expand Down Expand Up @@ -54,11 +61,17 @@ system_usec 125968
nr_periods 123
nr_throttled 1
throttled_usec 123123`,
'/sys/fs/cgroup/mypath/memory.current': '9876',
'/sys/fs/cgroup/mypath/memory.swap.current': '132645',
});

expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/mypath', cpuPath: '/mypath' }))
.toMatchInlineSnapshot(`
const metrics = await gatherV2CgroupMetrics('/mypath');
expect(metrics).toMatchInlineSnapshot(`
Object {
"cgroup_memory": Object {
"current_in_bytes": 9876,
"swap_current_in_bytes": 132645,
},
"cpu": Object {
"cfs_period_micros": 100000,
"cfs_quota_micros": 111,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,60 @@ import type { OsCgroupMetrics } from './types';
const PROC_CGROUP2_DIR = '/sys/fs/cgroup';
const CPU_STATS_FILE = 'cpu.stat';
const CPU_MAX_FILE = 'cpu.max';
const MEMORY_CURRENT_FILE = 'memory.current';
const MEMORY_SWAP_CURRENT_FILE = 'memory.swap.current';

interface Arg {
cpuPath: string;
cpuAcctPath: string;
}
const getCGroupFilePath = (group: string, fileName: string): string => {
return joinPath(PROC_CGROUP2_DIR, group, fileName);
};

export async function gatherV2CgroupMetrics(arg: Arg): Promise<OsCgroupMetrics> {
const [{ usage_nanos: usageNanos, ...stat }, cpuMax] = await Promise.all([
readCPUStat(arg.cpuPath),
readCPUMax(arg.cpuPath),
]);
export async function gatherV2CgroupMetrics(group: string): Promise<OsCgroupMetrics> {
const [{ usage_nanos: usageNanos, ...stat }, cpuMax, memoryCurrent, swapCurrent] =
await Promise.all([
readCPUStat(group),
readCPUMax(group),
readMemoryCurrent(group),
readSwapCurrent(group),
]);

return {
cpu: {
...cpuMax,
control_group: arg.cpuPath,
control_group: group,
stat,
},
cpuacct: {
control_group: arg.cpuPath,
control_group: group,
usage_nanos: usageNanos,
},
cgroup_memory: {
current_in_bytes: memoryCurrent,
swap_current_in_bytes: swapCurrent,
},
};
}

interface CPUMax {
cfs_period_micros: number;
cfs_quota_micros: number;
}

async function readMemoryCurrent(group: string): Promise<number> {
const rawMemoryCurrent = (await fs.readFile(getCGroupFilePath(group, MEMORY_CURRENT_FILE)))
.toString()
.trim();
return parseInt(rawMemoryCurrent, 10);
}

async function readSwapCurrent(group: string): Promise<number> {
const rawMemoryCurrent = (await fs.readFile(getCGroupFilePath(group, MEMORY_SWAP_CURRENT_FILE)))
.toString()
.trim();
return parseInt(rawMemoryCurrent, 10);
}
Comment on lines +54 to +66
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are "single constant" numeric files


async function readCPUMax(group: string): Promise<CPUMax> {
const [quota, period] = (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_MAX_FILE)))
const [quota, period] = (await fs.readFile(getCGroupFilePath(group, CPU_MAX_FILE)))
.toString()
.trim()
.split(/\s+/);
Expand All @@ -62,7 +85,7 @@ async function readCPUStat(group: string): Promise<CPUStat> {
time_throttled_nanos: -1,
usage_nanos: -1,
};
return (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_STATS_FILE)))
return (await fs.readFile(getCGroupFilePath(group, CPU_STATS_FILE)))
.toString()
.split(/\n/)
.reduce((acc, line) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ function createMockOpsProcessMetrics(): OpsProcessMetrics {
memory: {
heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 },
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ describe('ProcessMetricsCollector', () => {
const heapUsed = 4688;
const heapSizeLimit = 5788;
const rss = 5865;
const external = 9001;
const arrayBuffers = 42;

jest.spyOn(process, 'memoryUsage').mockImplementation(() => ({
rss,
heapTotal,
heapUsed,
external: 0,
arrayBuffers: 0,
external,
arrayBuffers,
}));

jest.spyOn(v8, 'getHeapStatistics').mockImplementation(
Expand All @@ -83,6 +86,8 @@ describe('ProcessMetricsCollector', () => {
expect(metrics[0].memory.heap.used_in_bytes).toEqual(heapUsed);
expect(metrics[0].memory.heap.size_limit).toEqual(heapSizeLimit);
expect(metrics[0].memory.resident_set_size_in_bytes).toEqual(rss);
expect(metrics[0].memory.external_in_bytes).toEqual(external);
expect(metrics[0].memory.array_buffers_in_bytes).toEqual(arrayBuffers);
});
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetri
size_limit: heapStats.heap_size_limit,
},
resident_set_size_in_bytes: memoryUsage.rss,
array_buffers_in_bytes: memoryUsage.arrayBuffers,
external_in_bytes: memoryUsage.external,
},
pid: process.pid,
event_loop_delay: eventLoopDelayHistogram.mean,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ export function createMockOpsProcessMetrics(): OpsProcessMetrics {
memory: {
heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 },
resident_set_size_in_bytes: 1,
external_in_bytes: 1,
array_buffers_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,15 @@ function createMockOpsMetrics(testMetrics: Partial<OpsMetrics>): OpsMetrics {
...testMetrics,
};
}

const testMetrics = {
process: {
memory: { heap: { used_in_bytes: 100 } },
memory: {
heap: { used_in_bytes: 100, total_in_bytes: 200, size_limit: 300 },
resident_set_size_in_bytes: 400,
external_in_bytes: 500,
array_buffers_in_bytes: 600,
},
uptime_in_millis: 1500,
event_loop_delay: 50,
event_loop_delay_histogram: { percentiles: { '50': 50, '75': 75, '95': 95, '99': 99 } },
Expand Down Expand Up @@ -127,9 +133,14 @@ describe('getEcsOpsMetricsLog', () => {
"utilization": 0.6365329598160299,
},
"memory": Object {
"arrayBuffersInBytes": 600,
"externalInBytes": 500,
"heap": Object {
"sizeLimit": 300,
"totalInBytes": 200,
"usedInBytes": 100,
},
"residentSetSizeInBytes": 400,
},
"uptime": 1,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@ export function getEcsOpsMetricsLog(metrics: OpsMetrics) {
memory: {
heap: {
usedInBytes: processMemoryUsedInBytes,
totalInBytes: process?.memory?.heap.total_in_bytes,
sizeLimit: process?.memory?.heap.size_limit,
},
residentSetSizeInBytes: process?.memory?.resident_set_size_in_bytes,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want these to log to stdout in pattern format, we'll need to need to fiddle with processMemoryUsedInBytesMsg on line 21.

const processMemoryUsedInBytesMsg = processMemoryUsedInBytes
    ? `memory: ${numeral(processMemoryUsedInBytes).format('0.0b')} `
    : '';

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logs are fine in `json` format:
{
"event": {
  "kind": "metric",
  "category": [
    "process",
    "host"
  ],
  "type": [
    "info"
  ]
},
"process": {
  "uptime": 72.65337025,
  "memory": {
    "heap": {
      "usedInBytes": 602026648,
      "totalInBytes": 695664640,
      "sizeLimit": 4345298944
    },
    "residentSetSizeInBytes": 1079312384,
    "externalInBytes": 7233984, <--- this PR
    "arrayBuffersInBytes": <--- this PR
  },
  "eventLoopDelay": 10.336272995850623,
  "eventLoopDelayHistogram": {
    "50": 10.346495,
    "95": 11.010047,
    "99": 12.140543
  },
  "eventLoopUtilization": {
    "active": 279.28487307039177,
    "idle": 4719.119918999997,
    "utilization": 0.05587480099920223
  },
  "pid": 63971
},
"host": {
  "os": {
    "load": {
      "1m": 6.076171875,
      "5m": 6.0078125,
      "15m": 6.32568359375
    }
  }
},
"service": {
  "node": {
    "roles": [
      "background_tasks",
      "ui"
    ]
  }
},
"ecs": {
  "version": "8.6.1"
},
"@timestamp": "2023-11-29T16:15:47.457-07:00",
"message": "memory: 574.1MB uptime: 0:01:12 load: [6.08,6.01,6.33] mean delay: 10.336 delay histogram: { 50: 10.346; 95: 11.010; 99: 12.141 } utilization: 0.05587",
"log": {
  "level": "DEBUG",
  "logger": "metrics.ops"
},
"trace": {
  "id": "095027dd7070903121c4316455216d32"
},
"transaction": {
  "id": "56fde480923f64a4"
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think it's fine. It's mostly for debug scenarios anyway, and we're using the json layout where it matters. But thanks for pointing that out!

externalInBytes: process?.memory?.external_in_bytes,
arrayBuffersInBytes: process?.memory?.array_buffers_in_bytes,
},
eventLoopDelay: eventLoopDelayVal,
eventLoopDelayHistogram: eventLoopDelayHistVals,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,14 @@ describe('MetricsService', () => {
"eventLoopDelayHistogram": undefined,
"eventLoopUtilization": undefined,
"memory": Object {
"arrayBuffersInBytes": undefined,
"externalInBytes": undefined,
"heap": Object {
"sizeLimit": undefined,
"totalInBytes": undefined,
"usedInBytes": undefined,
},
"residentSetSizeInBytes": undefined,
},
"uptime": undefined,
},
Expand Down
Loading
Loading