diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index ca8f4f0..98103c8 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -10,6 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: python-version: ["3.10", "3.11"] diff --git a/.gitignore b/.gitignore index c18dd8d..c1e64c1 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ __pycache__/ +.coverage diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..0bae781 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: check-docstring-first + - id: check-json + - id: pretty-format-json + args: [--autofix, --no-sort-keys] + - id: check-added-large-files + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer +- repo: https://github.com/myint/docformatter + rev: v1.5.1 + hooks: + - id: docformatter + args: [--in-place] +- repo: https://github.com/asottile/pyupgrade + rev: v3.3.1 + hooks: + - id: pyupgrade + args: [--py38-plus] +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 diff --git a/README.md b/README.md index 0cf6fd0..0de6dc6 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,41 @@ -# github-workflows-monitoring +# GitHub Workflows Monitoring [![Tests](https://github.com/midokura/github-workflows-monitoring/actions/workflows/tests.yaml/badge.svg)](https://github.com/midokura/github-workflows-monitoring/actions/workflows/tests.yaml) ## About -Github Workflow Monitoring is a small Flask-based web server that connects to Github using websockets to monitor Github Actions workflows. It tracks each workflow's state (queued, in_progress, completed) and calculates the time spent in each state. The metrics are logged in logfmt format for easy consumption by Grafana. +Github Workflows Monitoring is a small Python (Flask-based) application that processes [GitHub webhook calls] and logs them. +It tracks each workflow's state (`queued`, `in_progress`, `completed`) and calculates the time spent in each state. + +This application can be very useful to gather information about Organization Runners: +- How much time is spent before a job starts processing? +- What repositories are triggering lots of jobs? + +The metrics are logged in `logfmt` format to simplify querying them (eg. with Grafana). + +[GitHub webhook calls]: https://docs.github.com/en/developers/webhooks-and-events/webhooks/creating-webhooks + +## Setup + +Go to your **GitHub Organization** >> **Settings** >> **Webhooks** >> **Add new webhook**. + +Expose your application to Internet (ngrok, Load Balancer, etc), and **use endpoint** `/github-webhook`. + +![Example of Webhook configuration](media/github_setup.png) + +The **events** that are currently supported are: +- Workflow jobs ## Testing Into a virtual environment, install the requirements: - pip install -r tests/requirements.txt - +```sh +pip install -r tests/requirements.txt +``` To run the tests: - pytest --cov=src +```sh +pytest --cov=src +``` diff --git a/media/github_setup.png b/media/github_setup.png new file mode 100644 index 0000000..fc21fc3 Binary files /dev/null and b/media/github_setup.png differ diff --git a/setup.cfg b/setup.cfg index 292cb39..a7a6339 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,12 +1,12 @@ [metadata] name = github-workflows-monitoring -version = 0.1 +version = 0.1.1 license-file = LICENSE [options] python_requires = >=3.8 packages = find: -install_requires = +install_requires = Flask>=2.2,<3 [flake8] diff --git a/src/app.py b/src/app.py index 9fde501..0c134b4 100644 --- a/src/app.py +++ b/src/app.py @@ -7,7 +7,7 @@ from const import GithubHeaders, LOGGING_CONFIG -from utils import parse_datetime +from utils import parse_datetime, dict_to_logfmt dictConfig(LOGGING_CONFIG) @@ -51,15 +51,25 @@ def process_workflow_job(): workflow = job["workflow_job"]["workflow_name"] time_start = parse_datetime(job["workflow_job"]["started_at"]) repository = job["repository"]["full_name"] + repository_private = job["repository"]["private"] action = job["action"] + conclusion = job["workflow_job"].get("conclusion") + requestor = job.get("sender", {}).get("login") + runner_name = job["workflow_job"]["runner_name"] + runner_group_name = job["workflow_job"]["runner_group_name"] + runner_public = (runner_group_name == "GitHub Actions") + + context_details = { + "action": action, + "repository": repository, + "job_id": job_id, + "workflow": workflow, + "requestor": requestor, + } if action == "queued": # add to memory as timestamp jobs[job_id] = int(time_start.timestamp()) - msg = ( - f"action={action} repository={repository} job_id={job_id}" - f' workflow="{workflow}"' - ) elif action == "in_progress": job_requested = jobs.get(job_id) @@ -68,10 +78,14 @@ def process_workflow_job(): time_to_start = 0 else: time_to_start = (time_start - datetime.fromtimestamp(job_requested)).seconds - msg = ( - f"action={action} repository={repository} job_id={job_id}" - f' workflow="{workflow}" time_to_start={time_to_start}' - ) + + context_details = { + **context_details, + "time_to_start": time_to_start, + "runner_name": runner_name, + "runner_public": runner_public, + "repository_private": repository_private + } elif action == "completed": job_requested = jobs.get(job_id) @@ -84,29 +98,37 @@ def process_workflow_job(): ).seconds # delete from memory del jobs[job_id] - msg = ( - f"action={action} repository={repository} job_id={job_id}" - f' workflow="{workflow}" time_to_finish={time_to_finish}' - ) + + context_details = { + **context_details, + "time_to_finish": time_to_finish, + "conclusion": conclusion + } + else: app.logger.warning(f"Unknown action {action}, removing from memory") if job_id in jobs: del jobs[job_id] - msg = None + context_details = None - if msg: - app.logger.info(msg) + if context_details: + app.logger.info(dict_to_logfmt(context_details)) return True +allowed_events = { + "workflow_job": process_workflow_job +} + + @app.route("/github-webhook", methods=["POST"]) def github_webhook_process(): event = request.headers.get(GithubHeaders.EVENT.value) - command = f"process_{event}" - if command == "process_workflow_job": - app.logger.debug(f"Calling function {command}") - process_workflow_job() + if event in allowed_events: + app.logger.debug(f"Calling function to process {event=}") + func = allowed_events.get(event) + func() return "OK" app.logger.error(f"Unknown event type {event}, can't handle") diff --git a/src/utils.py b/src/utils.py index c71da7c..0b61add 100644 --- a/src/utils.py +++ b/src/utils.py @@ -2,6 +2,24 @@ def parse_datetime(date: str) -> datetime: - """Parse GitHub date to object""" + """Parse GitHub date to object.""" exp = "%Y-%m-%dT%H:%M:%SZ" return datetime.strptime(date, exp) + + +def dict_to_logfmt(data: dict) -> str: + """Convert a dict to logfmt string.""" + outstr = list() + for k, v in data.items(): + if v is None: + outstr.append(f"{k}=") + continue + if isinstance(v, bool): + v = "true" if v else "false" + elif isinstance(v, (dict, object, int)): + v = str(v) + + if " " in v: + v = '"%s"' % v.replace('"', '\\"') + outstr.append(f"{k}={v}") + return " ".join(outstr) diff --git a/tests/tests.py b/tests/tests.py index 70db745..e4fc72c 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -11,9 +11,22 @@ "id": 0, "workflow_name": "CI", "started_at": "2023-01-27T14:00:00Z", + "conclusion": None, + "labels": [], + "runner_id": None, + "runner_name": None, + "runner_group_id": None, + "runner_group_name": None, }, "repository": { + "name": "foo", "full_name": "foo/foo", + "private": False, + }, + "sender": { + "login": "testerbot", + "id": 1, + "type": "User", }, } @@ -53,7 +66,8 @@ def test_started_job_not_stored(client, caplog): assert response.status_code == 200 assert caplog.messages == [ "Job 2 is in_progress but not stored!", - 'action=in_progress repository=foo/foo job_id=2 workflow="CI" time_to_start=0', + 'action=in_progress repository=foo/foo job_id=2 workflow=CI requestor=testerbot time_to_start=0 ' + 'runner_name= runner_public=false repository_private=false', ] @@ -65,7 +79,7 @@ def test_finished_job_not_stored(client, caplog): assert response.status_code == 200 assert caplog.messages == [ "Job 3 is completed but not stored!", - 'action=completed repository=foo/foo job_id=3 workflow="CI" time_to_finish=0', + 'action=completed repository=foo/foo job_id=3 workflow=CI requestor=testerbot time_to_finish=0 conclusion=', ] @@ -79,7 +93,7 @@ def test_unknown_action(client, caplog): response = client.post("/github-webhook", headers=HEADERS, json=body_failed) assert response.status_code == 200 assert caplog.messages == [ - 'action=queued repository=foo/foo job_id=4 workflow="CI"', + 'action=queued repository=foo/foo job_id=4 workflow=CI requestor=testerbot', "Unknown action failed, removing from memory", ] @@ -91,7 +105,7 @@ def test_queued_job(client, caplog): response = client.post("/github-webhook", headers=HEADERS, json=body_queued) assert response.status_code == 200 assert caplog.messages == [ - 'action=queued repository=foo/foo job_id=1 workflow="CI"' + 'action=queued repository=foo/foo job_id=1 workflow=CI requestor=testerbot' ] @@ -103,7 +117,7 @@ def test_logging_flow(client, caplog): response = client.post("/github-webhook", headers=HEADERS, json=body_queued) assert response.status_code == 200 assert ( - caplog.messages[0] == 'action=queued repository=foo/foo job_id=5 workflow="CI"' + caplog.messages[0] == 'action=queued repository=foo/foo job_id=5 workflow=CI requestor=testerbot' ) body_started = BODY.copy() @@ -113,15 +127,19 @@ def test_logging_flow(client, caplog): assert response.status_code == 200 assert ( caplog.messages[1] - == 'action=in_progress repository=foo/foo job_id=5 workflow="CI" time_to_start=5' + == 'action=in_progress repository=foo/foo job_id=5 workflow=CI requestor=testerbot time_to_start=5 ' + 'runner_name= runner_public=false repository_private=false' + ) body_completed = BODY.copy() body_completed["action"] = "completed" + body_completed["workflow_job"]["conclusion"] = "success" body_completed["workflow_job"]["completed_at"] = "2023-01-27T14:05:00Z" response = client.post("/github-webhook", headers=HEADERS, json=body_completed) assert response.status_code == 200 assert ( caplog.messages[2] - == 'action=completed repository=foo/foo job_id=5 workflow="CI" time_to_finish=295' + == 'action=completed repository=foo/foo job_id=5 workflow=CI requestor=testerbot ' + 'time_to_finish=295 conclusion=success' )