Skip to content

Commit

Permalink
windows: Use instance label for hostname in hostname panel (#1353)
Browse files Browse the repository at this point in the history
* windows: Use instance label for hostname in hostname panel

* Fix NTP delay units (thanks to linter)

* Add units to panels

* Add none units to cpu count to pass linter

* Update win mixin
  • Loading branch information
v-zhuravlev authored Nov 6, 2024
1 parent ab84b9f commit b4993cb
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 78 deletions.
3 changes: 2 additions & 1 deletion common-lib/common/panels/cpu/stat/count.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ base {

stylize(allLayers=true):
(if allLayers then super.stylize() else {})
+ generic.info.stylize(allLayers=false),
+ generic.info.stylize(allLayers=false)
+ g.panel.stat.standardOptions.withUnit('none'),

}
9 changes: 3 additions & 6 deletions windows-active-directory-mixin/mixin.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,13 @@ local alerts = import './alerts/alerts.libsonnet';
local g = import './g.libsonnet';
local var = g.dashboard.variable;
local activedirectorymixin =
windowsobservlib.new(
filteringSelector='job=~"integrations/windows_exporter"',
uid='active-directory',
groupLabels=['job'],
instanceLabels=['instance'],
)
windowsobservlib.new()

{
config+: {
enableADDashboard: true,
groupLabels: ['job'],
uid: 'active-directory',
},
}

Expand Down
6 changes: 1 addition & 5 deletions windows-mixin/mixin.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ local winlib = import 'windows-observ-lib/main.libsonnet';
local config = (import 'config.libsonnet')._config;
{
local windows =
winlib.new(
dashboardNamePrefix=config.dashboardNamePrefix,
uid=config.uid,
filteringSelector=config.filteringSelector,
)
winlib.new()
+
{
config+: config,
Expand Down
42 changes: 42 additions & 0 deletions windows-observ-lib/config.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
// any modular library should include as inputs:
// 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups
// 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules.
// 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'.
// 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'.
// 'uid' - UID to prefix all dashboards original uids
groupLabels: ['job'],
instanceLabels: ['instance'],
filteringSelector: 'job=~".*windows.*"',
dashboardTags: ['windows'],
uid: 'windows',
dashboardNamePrefix: '',

// optional
ignoreVolumes: 'HarddiskVolume.*',
alertsCPUThresholdWarning: '90',
alertMemoryUsageThresholdCritical: '90',
alertDiskUsageThresholdCritical: '90',
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',

// optional Windows AD
alertsHighPendingReplicationOperations: 50, // count
alertsHighReplicationSyncRequestFailures: 0, // count
alertsHighPasswordChanges: 25, // count
alertsMetricsDownJobName: 'integrations/windows_exporter',
enableADDashboard: false,

// logs lib related
enableLokiLogs: true,
extraLogLabels: ['channel', 'source', 'keywords', 'level'],
logsVolumeGroupBy: 'level',
showLogsVolume: true,
logsExtraFilters:
|||
| label_format timestamp="{{__timestamp__}}"
| drop channel_extracted,source_extracted,computer_extracted,level_extracted,keywords_extracted
| line_format `{{ if eq "[[instance]]" ".*" }}{{ alignLeft 25 .instance}}|{{end}}{{alignLeft 12 .channel }}| {{ alignLeft 25 .source}}| {{ .message }}`
|||,
}
58 changes: 7 additions & 51 deletions windows-observ-lib/main.libsonnet
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
local alerts = import './alerts.libsonnet';
local config = import './config.libsonnet';
local dashboards = import './dashboards.libsonnet';
local datasources = import './datasources.libsonnet';
local g = import './g.libsonnet';
Expand All @@ -7,58 +8,10 @@ local targets = import './targets.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';

{
new(
filteringSelector,
groupLabels=['job'],
instanceLabels=['instance'],
dashboardNamePrefix='',
dashboardTags=[uid],
uid,
): {
new(): {

local this = self,
config: {
// any modular library should include as inputs:
// 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups
// 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules.
// 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'.
// 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'.
// 'uid' - UID to prefix all dashboards original uids
groupLabels: groupLabels,
instanceLabels: instanceLabels,
filteringSelector: filteringSelector,
dashboardTags: dashboardTags,
uid: uid,
dashboardNamePrefix: dashboardNamePrefix,

// optional
ignoreVolumes: 'HarddiskVolume.*',
alertsCPUThresholdWarning: '90',
alertMemoryUsageThresholdCritical: '90',
alertDiskUsageThresholdCritical: '90',
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',

// optional Windows AD
alertsHighPendingReplicationOperations: 50, // count
alertsHighReplicationSyncRequestFailures: 0, // count
alertsHighPasswordChanges: 25, // count
alertsMetricsDownJobName: 'integrations/windows_exporter',
enableADDashboard: false,

// logs lib related
enableLokiLogs: true,
extraLogLabels: ['channel', 'source', 'keywords', 'level'],
logsVolumeGroupBy: 'level',
showLogsVolume: true,
logsExtraFilters:
|||
| label_format timestamp="{{__timestamp__}}"
| drop channel_extracted,source_extracted,computer_extracted,level_extracted,keywords_extracted
| line_format `{{ if eq "[[instance]]" ".*" }}{{ alignLeft 25 .instance}}|{{end}}{{alignLeft 12 .channel }}| {{ alignLeft 25 .source}}| {{ .message }}`
|||,
},
config: config,
grafana: {
variables: commonlib.variables.new(
filteringSelector=this.config.filteringSelector,
Expand All @@ -74,7 +27,7 @@ local commonlib = import 'common-lib/common/main.libsonnet';
reboot: commonlib.annotations.reboot.new(
title='Reboot',
target=this.grafana.targets.reboot,
instanceLabels=std.join(',', instanceLabels),
instanceLabels=std.join(',', this.config.instanceLabels),
)
+ commonlib.annotations.base.withTagKeys(std.join(',', this.config.groupLabels + this.config.instanceLabels)),
}
Expand Down Expand Up @@ -124,5 +77,8 @@ local commonlib = import 'common-lib/common/main.libsonnet';
},

},
withConfigMixin(config): {
config+: config,
},

}
10 changes: 10 additions & 0 deletions windows-observ-lib/mixin.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
local windowslib = import './main.libsonnet';
{
_config:: {},
_windowsib::
windowslib.new()
+ windowslib.withConfigMixin(self._config),
grafanaDashboards+:: self._windowsib.grafana.dashboards,
prometheusAlerts+:: self._windowsib.prometheus.alerts,
prometheusRules+:: self._windowsib.prometheus.recordingRules,
}
35 changes: 20 additions & 15 deletions windows-observ-lib/panels.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -187,22 +187,25 @@ local utils = commonlib.utils;
A high number of context switches or interrupts can indicate that the system is overloaded or that there are problems with specific devices or processes.
|||
),
)
+ g.panel.timeSeries.standardOptions.withUnit('short'),
systemExceptions:
commonlib.panels.generic.timeSeries.base.new(
'System calls and exceptions',
targets=[
t.windowsSystemExceptions,
t.windowsSystemCalls,
],
),
)
+ g.panel.timeSeries.standardOptions.withUnit('short'),
systemThreads:
commonlib.panels.generic.timeSeries.base.new(
'System threads',
targets=[
t.windowsSystemThreads,
],
),
)
+ g.panel.timeSeries.standardOptions.withUnit('short'),
timeNtpStatus:
commonlib.panels.system.statusHistory.ntp.new(
'NTP status',
Expand All @@ -224,7 +227,7 @@ local utils = commonlib.utils;
Time offset: Absolute time offset between the system clock and the chosen time source, in seconds.
|||
)
+ g.panel.timeSeries.standardOptions.withUnit('seconds')
+ g.panel.timeSeries.standardOptions.withUnit('s')
+ g.panel.timeSeries.standardOptions.withNoValue('No data. Please check that "time" collector is enabled.'),
cpuCount: commonlib.panels.cpu.stat.count.new(targets=[t.cpuCount]),
cpuUsageTs: commonlib.panels.cpu.timeSeries.utilization.new(targets=[t.cpuUsage]),
Expand All @@ -242,17 +245,19 @@ local utils = commonlib.utils;
CPU usage by different modes.
|||
),
cpuQueue: commonlib.panels.generic.timeSeries.base.new(
'CPU average queue size',
targets=[t.cpuQueue],
description=|||
The CPU average queue size in Windows, often referred to as the "Processor Queue Length" or "CPU Queue Length," is a metric that measures the number of threads or tasks waiting to be processed by the central processing unit (CPU) at a given moment.
It is an essential performance indicator that reflects the workload and responsiveness of the CPU.
When the CPU queue length is high, it indicates that there are more tasks in line for processing than the CPU can handle immediately.
cpuQueue:
commonlib.panels.generic.timeSeries.base.new(
'CPU average queue size',
targets=[t.cpuQueue],
description=|||
The CPU average queue size in Windows, often referred to as the "Processor Queue Length" or "CPU Queue Length," is a metric that measures the number of threads or tasks waiting to be processed by the central processing unit (CPU) at a given moment.
It is an essential performance indicator that reflects the workload and responsiveness of the CPU.
When the CPU queue length is high, it indicates that there are more tasks in line for processing than the CPU can handle immediately.
This can lead to system slowdowns, decreased responsiveness, and potential performance issues. High CPU queue lengths are often associated with CPU saturation, where the CPU is struggling to keep up with the demands placed on it.
|||
),
This can lead to system slowdowns, decreased responsiveness, and potential performance issues. High CPU queue lengths are often associated with CPU saturation, where the CPU is struggling to keep up with the demands placed on it.
|||
)
+ g.panel.timeSeries.standardOptions.withUnit('short'),
memoryTotalBytes: commonlib.panels.memory.stat.total.new(targets=[t.memoryTotalBytes]),
memoryPageTotalBytes:
commonlib.panels.memory.stat.total.new(
Expand Down Expand Up @@ -363,7 +368,7 @@ local utils = commonlib.utils;
targets=[t.osInfo],
description="System's hostname."
)
{ options+: { reduceOptions+: { fields: '/^hostname$/' } } },
{ options+: { reduceOptions+: { fields: '/^instance$/' } } },
networkErrorsAndDroppedPerSec:
commonlib.panels.network.timeSeries.errors.new(
'Network errors and dropped packets',
Expand Down

0 comments on commit b4993cb

Please sign in to comment.