Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fix handling of multi process mode in metrics endpoint #229

Merged
merged 7 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ jobs:

- name: Run multi process tests with Pytest
run: |
export PROMETHEUS_MULTIPROC_DIR=/tmp/one-two-three
export PROMETHEUS_MULTIPROC_DIR=/tmp/pfi-tests/multiproc
rm -rf $PROMETHEUS_MULTIPROC_DIR
mkdir -p $PROMETHEUS_MULTIPROC_DIR
poetry run pytest -k test_multiprocess \
poetry run pytest -k test_multiproc \
--cov-append --cov-report=term-missing --cov-report=xml --cov=src

- name: Upload coverage to Codecov
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0).

### Fixed

- Fixed multi process mode in `expose()` method that handles the `/metrics`
endpoint. Due to reusing the registry assigned to the instrumentator it could
lead to duplicated metrics. Now the endpoint follows recommendation from
Prometheus client library documentation. Also improved multi process unit
tests. Closed issue
[#228](https://github.com/trallnag/prometheus-fastapi-instrumentator/issues/228)
and
[#227](https://github.com/trallnag/prometheus-fastapi-instrumentator/issues/227).
Fixed in pull request
[#229](https://github.com/trallnag/prometheus-fastapi-instrumentator/pull/229).

- Fixed `NameError` and "Duplicated timeseries..." errors that started to occur
with latest versions of Starlette / FastAPI in combination with multiple
middlewares. Instrumentation closures are now optional and the instrumentator
Expand Down
12 changes: 8 additions & 4 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ tasks:
- task: fmt
- task: lint
- task: test
- task: test-mp

fmt:
desc: Run formatters.
Expand All @@ -22,14 +23,17 @@ tasks:
test:
desc: Run tests.
cmds:
- poetry run pytest --cov-report=term-missing --cov-report=xml --cov=src
- poetry run pytest {{ .COVERAGE }}
vars:
COVERAGE: --cov-report=term-missing --cov-report=xml --cov=src

test-mp:
desc: Run multi process tests.
cmds:
- rm -rf $PROMETHEUS_MULTIPROC_DIR
- mkdir -p $PROMETHEUS_MULTIPROC_DIR
- poetry run pytest -k test_multiprocess
--cov-append --cov-report=term-missing --cov-report=xml --cov=src
- poetry run pytest -k test_multiproc {{ .COVERAGE }}
vars:
COVERAGE: --cov-append --cov-report=term-missing --cov-report=xml --cov=src
env:
PROMETHEUS_MULTIPROC_DIR: /tmp/prometheus-fastapi-instrumentator/multiproc
PROMETHEUS_MULTIPROC_DIR: /tmp/pfi-tests/multiproc
517 changes: 202 additions & 315 deletions poetry.lock

Large diffs are not rendered by default.

23 changes: 12 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,22 @@ keywords = ["prometheus", "instrumentation", "fastapi", "exporter", "metrics"]
python = ">= 3.7.0, < 4.0.0"
fastapi = ">= 0.38.1, < 1.0.0"
prometheus-client = ">= 0.8.0, < 1.0.0"
starlette = "0.25.0"

[tool.poetry.group.dev.dependencies]
httpx = "^0.23.1"
black = "^22.12.0"
httpx = "^0.23.3"
black = "^23.1.0"
flake8 = "^5.0.4"
requests = "^2.28.1"
pytest = "^7.2.0"
pytest-cov = "^3.0.0"
rope = "^1.6.0"
requests = "^2.28.2"
pytest = "^7.2.2"
pytest-cov = "^4.0.0"
rope = "^1.7.0"
isort = "^5.11.3"
mypy = "^0.971"
devtools = "^0.9.0"
pre-commit = "^2.20.0"
asgiref = "^3.5.2"
mypy = "^1.1.1"
devtools = "^0.10.0"
asgiref = "^3.6.0"
uvicorn = "^0.20.0"
gunicorn = "^20.1.0"
pytest-asyncio = "^0.20.3"

[tool.black]
line-length = 90
Expand All @@ -46,3 +46,4 @@ ignore_missing_imports = true
[tool.pytest.ini_options]
norecursedirs = "tests/helpers"
markers = ["slow: mark test as slow."]
asyncio_mode = "auto"
28 changes: 15 additions & 13 deletions src/prometheus_fastapi_instrumentator/instrumentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(
should_round_latency_decimals: bool = False,
should_respect_env_var: bool = False,
should_instrument_requests_inprogress: bool = False,
excluded_handlers: List[str] = None,
excluded_handlers: List[str] = [],
round_latency_decimals: int = 4,
env_var_name: str = "ENABLE_METRICS",
inprogress_name: str = "http_requests_inprogress",
Expand Down Expand Up @@ -109,9 +109,6 @@ def __init__(
self.inprogress_name = inprogress_name
self.inprogress_labels = inprogress_labels

if excluded_handlers is None:
excluded_handlers = []

self.excluded_handlers = [re.compile(path) for path in excluded_handlers]

self.instrumentations: List[Callable[[metrics.Info], None]] = []
Expand All @@ -131,17 +128,15 @@ def __init__(

if registry:
self.registry = registry
elif "PROMETHEUS_MULTIPROC_DIR" in os.environ:
else:
self.registry = REGISTRY

if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
pmd = os.environ["PROMETHEUS_MULTIPROC_DIR"]
if os.path.isdir(pmd):
self.registry = CollectorRegistry()
multiprocess.MultiProcessCollector(self.registry)
else:
if not os.path.isdir(pmd):
raise ValueError(
f"Env var PROMETHEUS_MULTIPROC_DIR='{pmd}' not a directory."
)
else:
self.registry = REGISTRY

def instrument(
self,
Expand Down Expand Up @@ -255,12 +250,19 @@ def expose(
def metrics(request: Request):
"""Endpoint that serves Prometheus metrics."""

ephemeral_registry = self.registry
if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
ephemeral_registry = CollectorRegistry()
multiprocess.MultiProcessCollector(ephemeral_registry)

if should_gzip and "gzip" in request.headers.get("Accept-Encoding", ""):
resp = Response(content=gzip.compress(generate_latest(self.registry)))
resp = Response(
content=gzip.compress(generate_latest(ephemeral_registry))
)
resp.headers["Content-Type"] = CONTENT_TYPE_LATEST
resp.headers["Content-Encoding"] = "gzip"
else:
resp = Response(content=generate_latest(self.registry))
resp = Response(content=generate_latest(ephemeral_registry))
resp.headers["Content-Type"] = CONTENT_TYPE_LATEST

return resp
Expand Down
23 changes: 23 additions & 0 deletions tests/helpers/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import shutil

from prometheus_client import REGISTRY


Expand Down Expand Up @@ -25,3 +28,23 @@ def reset_collectors() -> None:
gc_collector.GCCollector()

print(f"after re-register collectors={list(REGISTRY._collector_to_names.keys())}")


def is_prometheus_multiproc_valid() -> bool:
"""Checks if PROMETHEUS_MULTIPROC_DIR is set and a directory."""

if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
pmd = os.environ["PROMETHEUS_MULTIPROC_DIR"]
if os.path.isdir(pmd):
return True
else:
return False


def delete_dir_content(dirpath):
for filename in os.listdir(dirpath):
filepath = os.path.join(dirpath, filename)
try:
shutil.rmtree(filepath)
except OSError:
os.remove(filepath)
120 changes: 1 addition & 119 deletions tests/test_instrumentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from http import HTTPStatus
from typing import Any, Dict, Optional

import pytest
from fastapi import FastAPI, HTTPException
from prometheus_client import CONTENT_TYPE_LATEST, REGISTRY, Info, generate_latest
from requests import Response as TestClientResponse
Expand Down Expand Up @@ -410,9 +409,7 @@ def test_excluding_handlers_regex():

def test_excluded_handlers_none():
app = create_app()
exporter = (
Instrumentator(excluded_handlers=None).add(metrics.latency()).instrument(app)
)
exporter = Instrumentator(excluded_handlers=[]).add(metrics.latency()).instrument(app)
expose_metrics(app)

assert len(exporter.excluded_handlers) == 0
Expand Down Expand Up @@ -585,118 +582,3 @@ def sync_function(_):
)

assert result_sync > 0


# ==============================================================================
# Test with multiprocess reg.


def is_prometheus_multiproc_set():
if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
pmd = os.environ["PROMETHEUS_MULTIPROC_DIR"]
if os.path.isdir(pmd):
return True
else:
return False


# The environment variable MUST be set before anything regarding Prometheus is
# imported. That is why we cannot simply use `tempfile` or the fixtures
# provided by pytest. Test with:
# mkdir -p /tmp/test_multiproc;
# export PROMETHEUS_MULTIPROC_DIR=/tmp/test_multiproc;
# pytest -k test_multiprocess_reg;
# rm -rf /tmp/test_multiproc;
# unset PROMETHEUS_MULTIPROC_DIR


@pytest.mark.skipif(
is_prometheus_multiproc_set() is False,
reason="Environment variable must be set before starting Python process.",
)
def test_multiprocess_reg():
app = create_app()
Instrumentator(excluded_handlers=["/metrics"]).add(
metrics.latency(
buckets=(
1,
2,
3,
)
)
).instrument(app).expose(app)
client = TestClient(app)

get_response(client, "/")

response = get_response(client, "/metrics")
assert response.status_code == 200
assert b"Multiprocess" in response.content
assert b"# HELP process_cpu_seconds_total" not in response.content
assert b"http_request_duration_seconds" in response.content


@pytest.mark.skipif(is_prometheus_multiproc_set() is True, reason="Will never work.")
def test_multiprocess_reg_is_not(monkeypatch, tmp_path):
monkeypatch.setenv("PROMETHEUS_MULTIPROC_DIR", str(tmp_path))

app = create_app()
Instrumentator(excluded_handlers=["/metrics"]).add(metrics.latency()).instrument(
app
).expose(app)

client = TestClient(app)

get_response(client, "/")

response = get_response(client, "/metrics")
assert response.status_code == 200
assert b"" == response.content


@pytest.mark.skipif(
is_prometheus_multiproc_set() is True, reason="Just test handling of env detection."
)
def test_multiprocess_env_folder(monkeypatch, tmp_path):
monkeypatch.setenv("PROMETHEUS_MULTIPROC_DIR", "DOES/NOT/EXIST")

app = create_app()
with pytest.raises(Exception):
Instrumentator(buckets=(1, 2, 3,)).add(
metrics.latency()
).instrument(app)


# ------------------------------------------------------------------------------
# Test inprogress.


@pytest.mark.skipif(
is_prometheus_multiproc_set() is False,
reason="Environment variable must be set before starting Python process.",
)
def test_multiprocess_inprogress():
app = create_app()
Instrumentator(
should_instrument_requests_inprogress=True, inprogress_labels=True
).instrument(app).expose(app)
client = TestClient(app)

async def main():
loop = asyncio.get_event_loop()
futures = [
loop.run_in_executor(None, get_response, client, "/sleep?seconds=0.1")
for i in range(5)
]
for response in await asyncio.gather(*futures):
assert response.status_code == 200

loop = asyncio.get_event_loop()
loop.run_until_complete(main())

response = get_response(client, "/metrics")
assert response.status_code == 200
assert b"Multiprocess" in response.content
assert b"# HELP process_cpu_seconds_total" not in response.content
assert b"http_request_duration_seconds" in response.content
assert b'http_requests_inprogress{handler="/sleep",method="GET"}' in response.content
Loading