Skip to content

Commit

Permalink
Merge pull request #10 from aguadoenzo/nrpe-support
Browse files Browse the repository at this point in the history
Add NRPE checks
  • Loading branch information
morphis authored Sep 9, 2020
2 parents 60195f0 + c7a6c97 commit 0310caa
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 3 deletions.
16 changes: 16 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,19 @@ options:
type: string
default: stable
description: Snap Store channel to install the NATs snap from
nagios_context:
default: "juju"
type: string
description: |
Used by the nrpe subordinate charms.
A string that will be prepended to instance name to set the host name
in nagios. So for instance the hostname would be something like:
juju-myservice-0
If you're running multiple environments with the same services in them
this allows you to differentiate between them.
nagios_servicegroups:
default: ""
type: string
description: |
A comma-separated list of nagios servicegroups.
If left empty, the nagios_context will be used as the servicegroup
148 changes: 148 additions & 0 deletions lib/nrpe/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import logging
import os
import subprocess
from typing import List

import yaml

from ops.framework import EventBase, EventSource, EventsBase, StoredState
from ops.framework import Object

logger = logging.getLogger(__name__)

class NRPEAvailable(EventBase):
pass


class NRPEClientEvents(EventsBase):
nrpe_available = EventSource(NRPEAvailable)


class NRPEClient(Object):
on = NRPEClientEvents()
state = StoredState()

nrpe_confdir = '/etc/nagios/nrpe.d'
nagios_exportdir = '/var/lib/nagios/export'
check_template = """
#---------------------------------------------------
# This file is Juju managed
#---------------------------------------------------
command[%(check_name)s]=%(command)s
"""
service_template = ("""
#---------------------------------------------------
# This file is Juju managed
#---------------------------------------------------
define service {
use active-service
host_name %(hostname)s
service_description %(hostname)s[%(check_name)s] """
"""%(description)s
check_command check_nrpe!%(check_name)s
servicegroups %(servicegroup)s
}
""")

def __init__(self, charm, relation_name='nrpe-external-master'):
super().__init__(charm, relation_name)
self._relation_name = relation_name
self.state.set_default(checks={}, dirty=False, nrpe_ready=False)

self.framework.observe(charm.on[relation_name].relation_changed, self.on_relation_changed)

@property
def is_joined(self):
return self.framework.model.get_relation(self._relation_name) is not None

@property
def is_available(self):
return self.state.nrpe_ready

def add_check(self, command: List[str], name: str, description: str = None, hostname: str = None):
"""
Register a new check to be executed by NRPE.
Call NRPEClient.commit() to save changes.
If a check with the same name already exists, it will by updated.
:param command: A string array containing the command to be executed
:param name: Human readable name for the check
:param description: A short description of the check
:param hostname: Unit hostname. Defaults to a combination of nagios_context and unit name
"""
nagios_context = self.model.config['nagios_context']
nagios_servicegroups = self.model.config.get('nagios_servicegroups') or nagios_context
unit_name = self.model.unit.name.replace("/", "_")
hostname = hostname or f"{nagios_context}-{unit_name}"
if not description:
description = f'{name} {unit_name}'

new_check = {
'command': command,
'description': description,
'hostname': hostname,
'servicegroup': nagios_servicegroups,
}

if name not in self.state.checks or self.state.checks[name] != new_check:
self.state.dirty = True
self.state.checks[name] = new_check

def remove_check(self, name: str):
self.state.checks.pop(name, None)

def commit(self):
"""Commit checks to NRPE and Nagios"""
if not self.state.dirty:
logger.info('Skipping NRPE commit as nothing changed')
return

if not self.state.nrpe_ready:
logger.info('NRPE relation is not ready')
return

self._write_check_files()
self._publish_to_nagios()
subprocess.check_call(['systemctl', 'restart', 'nagios-nrpe-server'])
self.state.dirty = False
logger.info(f'Successfully updated NRPE checks: {", ".join(c for c in self.state.checks)}')

def _write_check_files(self):
"""Register the new checks with NRPE and place their configuration files in the appropriate locations"""
for check_name in self.state.checks:
check = self.state.checks[check_name]

check_filename = os.path.join(self.nrpe_confdir, f'{check_name}.cfg')
check_args = {
'check_name': check_name,
'command': ' '.join(check['command'])
}
with open(check_filename, 'w') as check_config:
check_config.write(self.check_template % check_args)

service_filename = os.path.join(self.nagios_exportdir, 'service__{}_{}.cfg'.format(check['hostname'], check_name))
service_args = {
'hostname': check['hostname'],
'description': check['description'],
'check_name': check_name,
'servicegroup': check['servicegroup']
}
with open(service_filename, 'w') as service_config:
service_config.write(self.service_template % service_args)

def _publish_to_nagios(self):
"""Publish check data on the monitors relation"""
rel = self.framework.model.get_relation(self._relation_name)
rel_data = rel.data[self.model.unit]
rel_data['version'] = '0.3'

nrpe_monitors = {}
for check_name in self.state.checks:
nrpe_monitors[check_name] = {'command': check_name}

rel_data['monitors'] = yaml.dump({"monitors": {"remote": {"nrpe": nrpe_monitors}}})

def on_relation_changed(self, event):
if not self.state.nrpe_ready:
self.state.nrpe_ready = True
self.on.nrpe_available.emit()

3 changes: 3 additions & 0 deletions metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ series:
provides:
client:
interface: nats
nrpe-external-master:
interface: nrpe-external-master
scope: container
requires:
ca-client:
interface: tls-certificates
Expand Down
21 changes: 18 additions & 3 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
ModelError,
BlockedStatus,
)
from nrpe.client import NRPEClient
from interfaces import NatsCluster, NatsClient, CAClient

from jinja2 import Environment, FileSystemLoader
Expand Down Expand Up @@ -74,6 +75,9 @@ def __init__(self, framework, key):
self.framework.observe(self.ca_client.on.tls_config_ready, self)
self.framework.observe(self.ca_client.on.ca_available, self)

self.nrpe_client = NRPEClient(self, 'nrpe-external-master')
self.framework.observe(self.nrpe_client.on.nrpe_available, self)

def on_install(self, event):
try:
core_res = self.model.resources.fetch('core')
Expand Down Expand Up @@ -111,10 +115,10 @@ def handle_tls_config(self):
load_pem_private_key(tls_key, backend=default_backend())
tls_cert = self.model.config['tls-cert']
if tls_cert:
load_pem_x509_certificate(tls_cert, bacend=default_backend())
load_pem_x509_certificate(tls_cert, backend=default_backend())
tls_ca_cert = self.model.config['tls-ca-cert']
if tls_ca_cert:
load_pem_x509_certificate(tls_ca_cert, default_backend())
load_pem_x509_certificate(tls_ca_cert, backend=default_backend())

self.state.use_tls = tls_key and tls_cert
self.state.use_tls_ca = bool(tls_ca_cert)
Expand All @@ -129,6 +133,9 @@ def handle_tls_config(self):
self.TLS_CA_CERT_PATH.write_text(tls_ca_cert)
self.client.set_tls_ca(tls_ca_cert)

def on_nrpe_available(self, event):
self.reconfigure_nats()

def on_ca_available(self, event):
self.reconfigure_nats()

Expand Down Expand Up @@ -206,14 +213,22 @@ def reconfigure_nats(self):
})
self.client.set_tls_ca(
self.ca_client.ca_certificate.public_bytes(encoding=serialization.Encoding.PEM).decode('utf-8'))

if self.nrpe_client.is_available:
self.nrpe_client.add_check(command=[
'/usr/lib/nagios/plugins/check_tcp',
'-H', str(self.client.listen_address),
'-p', str(self.model.config['client-port'])
], name='check_tcp')
self.nrpe_client.commit()

tenv = Environment(loader=FileSystemLoader('templates'))
template = tenv.get_template('nats.cfg.j2')
rendered_content = template.render(ctxt)
content_hash = self.generate_content_hash(rendered_content)
old_hash = self.state.nats_config_hash
if old_hash != content_hash:
logging.info(f'Config has changed - re-rendering a template to {self.NATS_SERVER_CONFIG_PATH}')
logger.info('')
self.state.nats_config_hash = content_hash
self.NATS_SERVER_CONFIG_PATH.write_text(rendered_content)
if self.state.is_started:
Expand Down

0 comments on commit 0310caa

Please sign in to comment.