Skip to content

Commit

Permalink
A more detailed list of clients (#63)
Browse files Browse the repository at this point in the history
* A more detailed list of clients

* fix for CodeQL cross-site scripting warning

* Hashing keys and refactoring config structure

* Duplicate Clientid validation

* bump version & adjust validate hash

* refactor _validate_hashes

* adding authentication to changelog
  • Loading branch information
IbraAoad committed Nov 30, 2023
1 parent 6a0d572 commit 63c09cf
Show file tree
Hide file tree
Showing 14 changed files with 205 additions and 51 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.6.0] - 2023-11-30

- Added badges to README.md (#62).
- Config now accommodates client ID, key, and name, allowing users to specify individual client details (#63).
- Added client authentication using SHA512-hashed keys for enhanced security (#63).


## [0.5.0] - 2023-10-26

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ receivers:
...
- name: cos-alerter
webhook_configs:
- url: http://<cos-alerter-address>:8080/alive?clientid=<clientid>
- url: http://<cos-alerter-address>:8080/alive?clientid=<clientid>&key=<clientkey>
route:
...
routes:
Expand Down
45 changes: 34 additions & 11 deletions cos_alerter/alerter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import datetime
import logging
import os
import sys
import textwrap
import threading
import time
Expand All @@ -14,7 +15,8 @@

import apprise
import durationpy
import yaml
from ruamel.yaml import YAML
from ruamel.yaml.constructor import DuplicateKeyError

logger = logging.getLogger(__name__)

Expand All @@ -30,14 +32,35 @@ def set_path(self, path: str):
"""Set the config file path."""
self.path = Path(path)

def _validate_hashes(self, clients):
"""Validate that keys in the clients dictionary are valid SHA-512 hashes."""
for client_info in clients.values():
client_key = client_info.get("key", "")
is_valid = len(client_key) == 128
if client_key and not is_valid:
return False
return True

def reload(self):
"""Reload config values from the disk."""
yaml = YAML(typ="rt")
with open(
os.path.join(os.path.dirname(os.path.realpath(__file__)), "config-defaults.yaml")
) as f:
self.data = yaml.safe_load(f)
self.data = yaml.load(f)
with open(self.path, "r") as f:
user_data = yaml.safe_load(f)
try:
user_data = yaml.load(f)
except DuplicateKeyError:
logger.critical("Duplicate client IDs found in COS Alerter config. Exiting...")
sys.exit(1)

# Validate that keys are valid SHA-512 hashes
if user_data and user_data.get("watch", {}).get("clients"):
if not self._validate_hashes(user_data["watch"]["clients"]):
logger.critical("Invalid SHA-512 hash(es) in config. Exiting...")
sys.exit(1)

deep_update(self.data, user_data)
self.data["watch"]["down_interval"] = durationpy.from_str(
self.data["watch"]["down_interval"]
Expand All @@ -50,15 +73,15 @@ def reload(self):
def deep_update(base: dict, new: typing.Optional[dict]):
"""Deep dict update.
Same as dict.update() except it recurses into dubdicts.
Same as dict.update() except it recurses into subdicts.
"""
if new is None:
return
for key in base:
if key in new and isinstance(base[key], dict):
deep_update(base[key], new[key])
elif key in new:
base[key] = new[key]
for key, new_value in new.items():
if key in base and isinstance(base[key], dict) and isinstance(new_value, dict):
deep_update(base[key], new_value)
else:
base[key] = new_value


config = Config()
Expand Down Expand Up @@ -120,9 +143,9 @@ def initialize():
# ...
# }
state["clients"] = {}
for client in config["watch"]["clients"]:
for client_id in config["watch"]["clients"]:
alert_time = None if config["watch"]["wait_for_first_connection"] else current_time
state["clients"][client] = {
state["clients"][client_id] = {
"lock": threading.Lock(),
"alert_time": alert_time,
"notify_time": None,
Expand Down
16 changes: 11 additions & 5 deletions cos_alerter/config-defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,19 @@ watch:
# This allows you to configure COS Alerter before configuring Alertmanager.
wait_for_first_connection: true

# The list of Alertmanager instances we are monitoring. Alertmanager instances should be
# configured with the clientid=<client> parameter.
# Configuration for monitoring Alertmanager instances.
# - clientid: Unique identifier for the Alertmanager instance.
# - key: Secret key for authenticating and authorizing communication with COS Alerter. (Should be a SHA512 hash)
# - name: Descriptive name for the instance.
# eg:
# clients:
# - "client0"
# - "client1"
clients: []
# clientid0:
# key: "822295b207a0b73dd4690b60a03c55599346d44aef3da4cf28c3296eadb98b2647ae18863cc3ae8ae5574191b60360858982fd8a8d176c0edf646ce6eee24ef9"
# name: "Instance Name 0"
# clientid1:
# key: "0415b0cad09712bd1ed094bc06ed421231d0603465e9841c959e9f9dcf735c9ce704df7a0c849a4e0db405c916f679a0e6c3f63f9e26191dda8069e1b44a3bc8"
# name: "Instance Name 1"
clients: {}

notify:

Expand Down
32 changes: 23 additions & 9 deletions cos_alerter/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""HTTP server for COS Alerter."""

import hashlib
import logging

import timeago
Expand All @@ -28,9 +29,10 @@ def dashboard():
status = "up" if not state.is_down() else "down"
if last_alert is None:
status = "unknown"
client_name = config["watch"]["clients"][clientid].get("name", "")
clients.append(
{
"clientid": clientid,
"client_name": client_name,
"status": status,
"alert_time": alert_time,
}
Expand All @@ -44,16 +46,28 @@ def alive():
# TODO Decide if we should validate the request.
params = request.args
clientid_list = params.getlist("clientid") # params is a werkzeug.datastructures.MultiDict
if len(clientid_list) < 1:
logger.warning("Request %s has no clientid.", request.url)
return 'Parameter "clientid" required.', 400
if len(clientid_list) > 1:
logger.warning("Request %s specified clientid more than once.", request.url)
return 'Parameter "clientid" provided more than once.', 400
key_list = params.getlist("key")

if len(clientid_list) < 1 or len(key_list) < 1:
logger.warning("Request %s is missing clientid or key.", request.url)
return 'Parameters "clientid" and "key" are required.', 400
if len(clientid_list) > 1 or len(key_list) > 1:
logger.warning("Request %s specified clientid or key more than once.", request.url)
return 'Parameters "clientid" and "key" should be provided exactly once.', 400
clientid = clientid_list[0]
if clientid not in config["watch"]["clients"]:
logger.warning("Request %s specified an unknown clientid.")
key = key_list[0]

# Find the client with the specified clientid
client_info = config["watch"]["clients"].get(clientid)
if not client_info:
logger.warning("Request %s specified an unknown clientid.", request.url)
return 'Clientid {params["clientid"]} not found. ', 404

# Hash the key and compare with the stored hashed key
hashed_key = hashlib.sha512(key.encode()).hexdigest()
if hashed_key != client_info.get("key", ""):
logger.warning("Request %s provided an incorrect key.", request.url)
return "Incorrect key for the specified clientid.", 401
logger.info("Received alert from Alertmanager clientid: %s.", clientid)
with AlerterState(clientid) as state:
state.reset_alert_timeout()
Expand Down
2 changes: 1 addition & 1 deletion cos_alerter/templates/dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ <h2>Clients</h2>
<tbody>
{% for client in clients %}
<tr>
<td>{{ client["clientid"] }}</td>
<td>{{ client["client_name"] }}</td>
{% if client["status"] == "up" %}
<td>✅ Up</td>
{% elif client["status"] == "down" %}
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "cos-alerter"
version = "0.5.0"
version = "0.6.0"
authors = [
{ name="Dylan Stephano-Shachter", email="dylan.stephano-shachter@canonical.com" }
]
Expand All @@ -26,6 +26,7 @@ dependencies = [
"pyyaml~=6.0",
"timeago~=1.0",
"waitress~=2.1",
"ruamel.yaml~=0.18.0"
]

[project.urls]
Expand Down
4 changes: 2 additions & 2 deletions rockcraft.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: cos-alerter
summary: A liveness checker for self-monitoring.
description: Receive regular pings from the cos stack and alert when they stop.
version: "0.5.0" # NOTE: Make sure this matches `cos-alerter` below
version: "0.6.0" # NOTE: Make sure this matches `cos-alerter` below
base: ubuntu:22.04
license: Apache-2.0
platforms:
Expand All @@ -11,7 +11,7 @@ parts:
plugin: python
source: .
python-packages:
- cos-alerter==0.5.0 # NOTE: Make sure this matches `version` above
- cos-alerter==0.6.0 # NOTE: Make sure this matches `version` above
stage-packages:
- python3-venv
services:
Expand Down
2 changes: 1 addition & 1 deletion snap/snapcraft.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cos-alerter
version: '0.5.0'
version: '0.6.0'
summary: A watchdog alerting on alertmanager notification failures.
license: Apache-2.0
contact: simon.aronsson@canonical.com
Expand Down
7 changes: 6 additions & 1 deletion tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
"watch": {
"down_interval": "5m",
"wait_for_first_connection": False,
"clients": ["client0"],
"clients": {
"clientid1": {
"key": "822295b207a0b73dd4690b60a03c55599346d44aef3da4cf28c3296eadb98b2647ae18863cc3ae8ae5574191b60360858982fd8a8d176c0edf646ce6eee24ef9",
"name": "Instance Name 1",
},
},
},
"notify": {
"destinations": DESTINATIONS,
Expand Down
63 changes: 55 additions & 8 deletions tests/test_alerter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,53 @@ def test_config_default_empty_file(fake_fs):
assert config["watch"]["down_interval"] == 300


def test_duplicate_key_error(fake_fs):
duplicate_config = """
watch:
down_interval: "5m"
wait_for_first_connection: true
clients:
clientid1:
key: "clientkey1"
name: "Instance Name 1"
clientid1:
key: "clientkey1"
name: "Instance Name 1"
"""
with open("/etc/cos-alerter.yaml", "w") as f:
f.write(duplicate_config)

try:
config.reload()
except SystemExit as exc:
assert exc.code == 1
else:
# If no exception is raised, fail the test
assert False


def test_invalid_hashes(fake_fs):
duplicate_config = """
watch:
down_interval: "5m"
wait_for_first_connection: true
clients:
invalidhashclient:
key: "E0E06B8DB6ED8DD4E1FFE98376E606BDF4FE4ABB4AF65BFE8B18FBFA6564D8B3"
name: "Instance Name 1"
"""
with open("/etc/cos-alerter.yaml", "w") as f:
f.write(duplicate_config)

try:
config.reload()
except SystemExit as exc:
assert exc.code == 1
else:
# If no exception is raised, fail the test
assert False


def test_config_default_partial_file(fake_fs):
conf = yaml.dump({"log_level": "info"})
with open("/etc/cos-alerter.yaml", "w") as f:
Expand All @@ -50,7 +97,7 @@ def test_config_default_override(fake_fs):
def test_initialize(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
assert state.start_date == 1672531200.0
assert state.start_time == 1000
Expand All @@ -72,7 +119,7 @@ def test_up_time(monotonic_mock, fake_fs):
def test_is_down_from_initialize(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 1180 # Three minutes have passed
assert state.is_down() is False
Expand All @@ -85,7 +132,7 @@ def test_is_down_from_initialize(monotonic_mock, fake_fs):
def test_is_down_with_reset_alert_timeout(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 2000
state.reset_alert_timeout()
Expand All @@ -106,7 +153,7 @@ def test_is_down_with_wait_for_first_connection(monotonic_mock, fake_fs):
config.reload()
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 1500
assert state.is_down() is False # 6 minutes have passes but we have not started counting.
Expand All @@ -122,7 +169,7 @@ def test_is_down_with_wait_for_first_connection(monotonic_mock, fake_fs):
def test_is_down(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
monotonic_mock.return_value = 2000
state.reset_alert_timeout()
Expand All @@ -137,7 +184,7 @@ def test_is_down(monotonic_mock, fake_fs):
def test_recently_notified(monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")
with state:
state._set_notify_time()
monotonic_mock.return_value = 2800 # 30 minutes have passed
Expand All @@ -153,7 +200,7 @@ def test_recently_notified(monotonic_mock, fake_fs):
def test_notify(notify_mock, add_mock, monotonic_mock, fake_fs):
monotonic_mock.return_value = 1000
AlerterState.initialize()
state = AlerterState(clientid="client0")
state = AlerterState(clientid="clientid1")

with state:
state.notify()
Expand All @@ -166,7 +213,7 @@ def test_notify(notify_mock, add_mock, monotonic_mock, fake_fs):
title="**Alertmanager is Down!**",
body=textwrap.dedent(
"""
Your Alertmanager instance: client0 seems to be down!
Your Alertmanager instance: clientid1 seems to be down!
It has not alerted COS-Alerter ever.
"""
),
Expand Down
Loading

0 comments on commit 63c09cf

Please sign in to comment.