From 4912bc02401699f6faf17a521e52f4fe1292f8cf Mon Sep 17 00:00:00 2001 From: Gabriel Filion Date: Wed, 28 Aug 2024 18:00:20 -0400 Subject: [PATCH 1/3] OpenMetrics: labels for timeseries needrestart_build_info misquoted (Implements part of the solution for #310) --- needrestart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/needrestart b/needrestart index 07cd023f..79b03276 100755 --- a/needrestart +++ b/needrestart @@ -1428,7 +1428,7 @@ if($opt_p) { if ($opt_o) { print "# TYPE needrestart_build info\n"; print "# HELP needrestart_build information about needrestart's runtime build\n"; - print "needrestart_build_info{version=$NeedRestart::VERSION,perl_version=$^V} 1\n"; + print "needrestart_build_info{version=\"$NeedRestart::VERSION\",perl_version=\"$^V\"} 1\n"; if ($opt_k) { my @ometric_kernel_status = map { $_ == $ometric_kernel_values{kresult} ? 1 : 0 } (NRK_NOUPGRADE, NRK_ABIUPGRADE, NRK_VERUPGRADE); From d0767f77df92f4e31bf760c58e5ab70a1f4a929a Mon Sep 17 00:00:00 2001 From: Gabriel Filion Date: Wed, 28 Aug 2024 18:03:25 -0400 Subject: [PATCH 2/3] OpenMetrics: metric types not accepted by prometheus (Closes: #310) Apparently, prometheus does not implement the full OpenMetrics spec and only supports a very limited set of metric types. If the output in OpenMetrics format is going to be useful to people, we need the output to be ingestible by prometheus since it's one of the most popular openmetrics-type TSDB software. * type info is trivially converted into a gauge * stateset needs to be reworked into a gauge that has a label that changes to indicate the status This patch also moves the logic about how status codes are interpreted up closer to where they are being used. It keeps the output section easier to read. --- needrestart | 84 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/needrestart b/needrestart index 79b03276..dc1316bf 100755 --- a/needrestart +++ b/needrestart @@ -472,6 +472,11 @@ my %ometric_kernel_values = ( krunning => q(unknown), kexpected => q(unknown), ); +my %ometric_ucode_values = ( + status => q(unknown), + current => q(unknown), + expected => q(unkown), +); my %restart; my %sessions; @@ -910,7 +915,12 @@ if(defined($opt_k)) { } } elsif ($opt_o) { - $ometric_kernel_values{kresult} = $kresult; + my %kernel_states = ( + &NRK_NOUPGRADE => "current", + &NRK_ABIUPGRADE => "abi_upgrade", + &NRK_VERUPGRADE => "version_upgrade", + ); + $ometric_kernel_values{kresult} = $kernel_states{$kresult}; $ometric_kernel_values{krunning} = $kvars{KVERSION}; $ometric_kernel_values{kexpected} = $kvars{EVERSION}; } @@ -969,23 +979,35 @@ if($opt_w) { } } else { - if($ucode_result == NRM_OBSOLETE) { - $nagios{mstr} = "OBSOLETE"; - $nagios{mret} = $nrconf{q(nagios-status)}->{ucode}; - $nagios{mperf} = 1; - } - elsif($ucode_result == NRM_CURRENT) { - $nagios{mstr} = "CURRENT"; - $nagios{mret} = 0; - $nagios{mperf} = 0; - } + if ($opt_p) { + if($ucode_result == NRM_OBSOLETE) { + $nagios{mstr} = "OBSOLETE"; + $nagios{mret} = $nrconf{q(nagios-status)}->{ucode}; + $nagios{mperf} = 1; + } + elsif($ucode_result == NRM_CURRENT) { + $nagios{mstr} = "CURRENT"; + $nagios{mret} = 0; + $nagios{mperf} = 0; + } - if($nagios{mret} == 1) { - $nagios{mstr} .= " (!)"; - } - elsif($nagios{mret} == 2) { - $nagios{mstr} .= " (!!)"; - } + if($nagios{mret} == 1) { + $nagios{mstr} .= " (!)"; + } + elsif($nagios{mret} == 2) { + $nagios{mstr} .= " (!!)"; + } + } + elsif ($opt_o) { + my %ucode_states = ( + &NRM_CURRENT => "current", + &NRM_OBSOLETE => "obsolete", + &NRM_UNKNOWN => "unknown", + ); + $ometric_ucode_values{status} = $ucode_states{$ucode_result}; + $ometric_ucode_values{current} = $ucode_result != NRM_UNKNOWN ? $ucode_vars{CURRENT} : "unknown"; + $ometric_ucode_values{expected} = $ucode_result != NRM_UNKNOWN ? $ucode_vars{AVAIL} : "unknown"; + } } } else { @@ -1426,33 +1448,25 @@ if($opt_p) { exit $ret; } if ($opt_o) { - print "# TYPE needrestart_build info\n"; - print "# HELP needrestart_build information about needrestart's runtime build\n"; + print "# TYPE needrestart_build_info gauge\n"; + print "# HELP needrestart_build_info information about needrestart's runtime build\n"; print "needrestart_build_info{version=\"$NeedRestart::VERSION\",perl_version=\"$^V\"} 1\n"; if ($opt_k) { - my @ometric_kernel_status = map { $_ == $ometric_kernel_values{kresult} ? 1 : 0 } (NRK_NOUPGRADE, NRK_ABIUPGRADE, NRK_VERUPGRADE); - print "# TYPE needrestart_kernel_status stateset\n"; + print "# TYPE needrestart_kernel_status gauge\n"; print "# HELP needrestart_kernel_status status of kernel as reported by needrestart\n"; - print "needrestart_kernel_status{needrestart_kernel_status=\"current\"} $ometric_kernel_status[0]\n"; - print "needrestart_kernel_status{needrestart_kernel_status=\"abi_upgrade\"} $ometric_kernel_status[1]\n"; - print "needrestart_kernel_status{needrestart_kernel_status=\"version_upgrade\"} $ometric_kernel_status[2]\n"; - print "# TYPE needrestart_kernel info\n"; + print "needrestart_kernel_status{needrestart_kernel_status=\"$ometric_kernel_values{kresult}\"} 1\n"; + print "# TYPE needrestart_kernel gauge\n"; print "# HELP needrestart_kernel version information for currenly running and most up to date kernels\n"; - print "needrestart_kernel_info{running=\"$ometric_kernel_values{krunning}\",expected=\"$ometric_kernel_values{kexpected}\"} 1\n"; + print "needrestart_kernel{running=\"$ometric_kernel_values{krunning}\",expected=\"$ometric_kernel_values{kexpected}\"} 1\n"; } if ($opt_w) { - my $ometric_ucode_current = $ucode_result != NRM_UNKNOWN ? $ucode_vars{CURRENT} : "unknown"; - my $ometric_ucode_expected = $ucode_result != NRM_UNKNOWN ? $ucode_vars{AVAIL} : "unknown"; - my @ometric_ucode_status = map { $_ == $ucode_result ? 1 : 0 } (NRM_CURRENT, NRM_OBSOLETE, NRM_UNKNOWN); - print "# TYPE needrestart_ucode_status stateset\n"; + print "# TYPE needrestart_ucode_status gauge\n"; print "# HELP needrestart_ucode_status status of the host's CPU microcode as reported by needrestart\n"; - print "needrestart_ucode_status{needrestart_ucode_status=\"current\"} $ometric_ucode_status[0]\n"; - print "needrestart_ucode_status{needrestart_ucode_status=\"obsolete\"} $ometric_ucode_status[1]\n"; - print "needrestart_ucode_status{needrestart_ucode_status=\"unknown\"} $ometric_ucode_status[2]\n"; - print "# TYPE needrestart_ucode info\n"; + print "needrestart_ucode_status{needrestart_ucode_status=\"$ometric_ucode_values{status}\"} 1\n"; + print "# TYPE needrestart_ucode gauge\n"; print "# HELP needrestart_ucode version informaion for currently used and available microcode\n"; - print "needrestart_ucode_info{running=\"$ometric_ucode_current\",expected=\"$ometric_ucode_expected\"} 1\n"; + print "needrestart_ucode{running=\"$ometric_ucode_values{current}\",expected=\"$ometric_ucode_values{expected}\"} 1\n"; } if ($opt_l) { my $ometric_num_services = scalar %restart; From 5d24f78871a7efab6f3b0766bc4971baca2ff9ac Mon Sep 17 00:00:00 2001 From: Gabriel Filion Date: Thu, 29 Aug 2024 15:59:59 -0400 Subject: [PATCH 3/3] Bring back the _info suffix in the kernel and ucode timeseries The types were changed to gauge but functionally they are still used as an info-type metric. --- needrestart | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/needrestart b/needrestart index dc1316bf..cad58c37 100755 --- a/needrestart +++ b/needrestart @@ -1456,16 +1456,16 @@ if ($opt_o) { print "# TYPE needrestart_kernel_status gauge\n"; print "# HELP needrestart_kernel_status status of kernel as reported by needrestart\n"; print "needrestart_kernel_status{needrestart_kernel_status=\"$ometric_kernel_values{kresult}\"} 1\n"; - print "# TYPE needrestart_kernel gauge\n"; - print "# HELP needrestart_kernel version information for currenly running and most up to date kernels\n"; + print "# TYPE needrestart_kernel_info gauge\n"; + print "# HELP needrestart_kernel_info version information for currenly running and most up to date kernels\n"; print "needrestart_kernel{running=\"$ometric_kernel_values{krunning}\",expected=\"$ometric_kernel_values{kexpected}\"} 1\n"; } if ($opt_w) { print "# TYPE needrestart_ucode_status gauge\n"; print "# HELP needrestart_ucode_status status of the host's CPU microcode as reported by needrestart\n"; print "needrestart_ucode_status{needrestart_ucode_status=\"$ometric_ucode_values{status}\"} 1\n"; - print "# TYPE needrestart_ucode gauge\n"; - print "# HELP needrestart_ucode version informaion for currently used and available microcode\n"; + print "# TYPE needrestart_ucode_info gauge\n"; + print "# HELP needrestart_ucode_info version informaion for currently used and available microcode\n"; print "needrestart_ucode{running=\"$ometric_ucode_values{current}\",expected=\"$ometric_ucode_values{expected}\"} 1\n"; } if ($opt_l) {