Merge pull request #1852 from camptocamp/stats-full-prometheus-client

Use official Prometheus client, deprecate old one and StatsD
camptocamp · Jun 8, 2023 · e656c54 · e656c54
2 parents 14aaa21 + ed4c37a
commit e656c54
Show file tree

Hide file tree

Showing 43 changed files with 888 additions and 1,038 deletions.
diff --git a/.prospector.yaml b/.prospector.yaml
@@ -15,6 +15,10 @@ pylint:
       - lxml
   disable:
     - too-many-return-statements
+    - too-many-arguments
+    - too-many-branches
+    - too-many-instance-attributes
+    - too-few-public-methods
     - global-statement
     - line-too-long
     - import-outside-toplevel
@@ -24,8 +28,6 @@ pylint:
     - no-self-use
     - import-error
     - superfluous-parens
-    - too-few-public-methods
-    - too-many-arguments
     - ungrouped-imports
     - unused-argument
     - use-symbolic-message-instead

diff --git a/BREAKING_CHANGES.mg → BREAKING_CHANGES.md b/BREAKING_CHANGES.mg → BREAKING_CHANGES.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## Release 6.0
+
+- The stats will not anymore be published on StatsD, use Prometheus client instead.
+
 ## Release 5.2
 
 - `c2cwsgiutils_run` is completely removes (not used from 5.0).

diff --git a/Dockerfile b/Dockerfile
@@ -70,7 +70,9 @@ RUN --mount=type=cache,target=/root/.cache \
   python3 -m pip install --disable-pip-version-check --no-deps --editable=. \
   && python3 -m compileall -q \
   && python3 -m compileall /usr/local/lib/python3.* /usr/lib/python3.* . -q \
-  && python3 -c 'import c2cwsgiutils'
+  && python3 -c 'import c2cwsgiutils' \
+  && mkdir -p /prometheus-metrics \
+  && chmod a+rwx /prometheus-metrics
 
 ENV C2C_BASE_PATH=/c2c \
   C2C_REDIS_URL= \

diff --git a/Makefile b/Makefile
@@ -79,9 +79,9 @@ pull: ## Pull the Docker images
 	for image in `find -name "docker-compose*.yaml" | xargs grep --no-filename "image:" | awk '{print $$2}' | sort -u | grep -v $(DOCKER_BASE) | grep -v rancher`; do docker pull $$image; done
 
 .PHONY: run
-run: build_test_app ## Run the test application
+run: build_test_app build_docker ## Run the test application
 	# cp acceptance_tests/tests/docker-compose.override.sample.yaml acceptance_tests/tests/docker-compose.override.yaml
-	cd acceptance_tests/tests/; TEST_IP=172.17.0.1 docker-compose up
+	cd acceptance_tests/tests/; docker-compose up --detach
 
 .PHONY: mypy_local
 mypy_local: .venv/timestamp

diff --git a/README.md b/README.md
@@ -3,8 +3,7 @@
 This is a Python 3 library (>=3.5) providing common tools for Camptocamp WSGI
 applications:
 
-- Provide a small framework for gathering performance statistics about
-  a web application (statsd protocol)
+- Provide prometheus metrics
 - Allow to use a master/slave PostgresQL configuration
 - Logging handler for CEE/UDP logs
   - An optional view to change runtime the log levels
@@ -43,28 +42,28 @@ You should install `c2cwsgiutils` with the tool you use to manage your pip depen
 
 In the `Dockerfile` you should add the following lines:
 
-```
+```dockerfile
 # Generate the version file.
 RUN c2cwsgiutils-genversion $(git rev-parse HEAD)
 
 CMD ["gunicorn", "--paste=/app/production.ini"]
 
 # Default values for the environment variables
 ENV \
-    DEVELOPMENT=0 \
-    SQLALCHEMY_POOL_RECYCLE=30 \
-    SQLALCHEMY_POOL_SIZE=5 \
-    SQLALCHEMY_MAX_OVERFLOW=25 \
-    SQLALCHEMY_SLAVE_POOL_RECYCLE=30 \
-    SQLALCHEMY_SLAVE_POOL_SIZE=5 \
-    SQLALCHEMY_SLAVE_MAX_OVERFLOW=25\
-    LOG_TYPE=console \
-    OTHER_LOG_LEVEL=WARNING \
-    GUNICORN_LOG_LEVEL=WARNING \
-    GUNICORN_ACCESS_LOG_LEVEL=INFO \
-    SQL_LOG_LEVEL=WARNING \
-    C2CWSGIUTILS_LOG_LEVEL=WARNING \
-    LOG_LEVEL=INFO
+  DEVELOPMENT=0 \
+  SQLALCHEMY_POOL_RECYCLE=30 \
+  SQLALCHEMY_POOL_SIZE=5 \
+  SQLALCHEMY_MAX_OVERFLOW=25 \
+  SQLALCHEMY_SLAVE_POOL_RECYCLE=30 \
+  SQLALCHEMY_SLAVE_POOL_SIZE=5 \
+  SQLALCHEMY_SLAVE_MAX_OVERFLOW=25\
+  LOG_TYPE=console \
+  OTHER_LOG_LEVEL=WARNING \
+  GUNICORN_LOG_LEVEL=WARNING \
+  GUNICORN_ACCESS_LOG_LEVEL=INFO \
+  SQL_LOG_LEVEL=WARNING \
+  C2CWSGIUTILS_LOG_LEVEL=WARNING \
+  LOG_LEVEL=INFO
 ```
 
 Add in your `main` function.
@@ -270,47 +269,6 @@ The requests module is also patched to monitor requests done without timeout. In
 configure a default timeout with the `C2C_REQUESTS_DEFAULT_TIMEOUT` environment variable
 (`c2c.requests_default_timeout`). If no timeout and no default is specified, a warning is issued.
 
-## Metrics
-
-To enable and configure the metrics framework, you can use:
-
-- STATS_VIEW (c2c.stats_view): if defined, will enable the stats view `{C2C_BASE_PATH}/stats.json`
-- STATSD_ADDRESS (c2c.statsd_address): if defined, send stats to the given statsd server
-- STATSD_PREFIX (c2c.statsd_prefix): prefix to add to every metric names
-- STATSD_USE_TAGS: If true, automatic metrics will use tags
-- STATSD*TAG*{tag_name}: To set a global tag for the service
-
-If enabled, some metrics are automatically generated:
-
-- {STATSD_PREFIX}.route.{verb}.{route_name}.{status}: The time to process a query (includes rendering)
-- {STATSD_PREFIX}.render.{verb}.{route_name}.{status}: The time to render a query
-- {STATSD_PREFIX}.sql.{query}: The time to execute the given SQL query (simplified and normalized)
-- {STATSD_PREFIX}.requests.{scheme}.{hostname}.{port}.{verb}.{status}: The time to execute HTTP requests to
-  outside services (only the time between the start of sending of the request and when the header is
-  back with a chunk of the body)
-- {STATSD_PREFIX}.redis.{command}: The time to execute the given Redis command
-
-You can manually measure the time spent on something like that:
-
-```python
-from c2cwsgiutils import stats
-with stats.timer_context(['toto', 'tutu']):
-    do_something()
-```
-
-It will only add a timer event in case of success. If you want to measure both success and failures, do that:
-
-```python
-from c2cwsgiutils import stats
-with stats.outcome_timer_context(['toto', 'tutu']):
-    do_something()
-```
-
-Other functions exists to generate metrics. Look at the `c2cwsgiutils.stats` module.
-
-Look at the `c2cwsgiutils-stats-db` utility if you want to generate statistics (gauges) about the
-row counts.
-
 ## SQL profiler
 
 The SQL profiler must be configured with the `C2C_SQL_PROFILER_ENABLED` environment variable. That enables a view
@@ -458,27 +416,100 @@ If the `/app/versions.json` exists, a view is added (`{C2C_BASE_PATH}/versions.j
 version of a app. This file is generated by calling the `c2cwsgiutils-genversion [$GIT_TAG] $GIT_HASH`
 command line. Usually done in the [Dockerfile](acceptance_tests/app/Dockerfile) of the WSGI application.
 
-## Metrics
+## Prometheus
+
+[Prometheus client](https://github.com/prometheus/client_python) is integrated in c2cwsgiutils.
+
+It will work in multi process mode with the limitation listed in the
+[`prometheus_client` documentation](https://github.com/prometheus/client_python#multiprocess-mode-eg-gunicorn).
+
+To enable it you should provide the `PROMETHEUS_PORT` environment variable.
+For security reason, this port should not be exposed.
+
+We can customize it with the following environment variables:
+
+- `PROMETHEUS_PREFIX`: to customize the prefix, default is `c2cwsggiutils-`.
+- `C2C_PROMETHEUS_PACKAGES` the packages that will be present in the version information, default is `c2cwsgiutils,pyramid,gunicorn,sqlalchemy`.
+- `C2C_PROMETHEUS_APPLICATION_PACKAGES` the packages that will be present in the version information as application.
+
+And you should add in your `gunicorn.conf.py`:
+
+```python
+from prometheus_client import multiprocess
+
+
+def on_starting(server):
+    from c2cwsgiutils import prometheus
+
+    del server
 
-The path `/metrics` provide some metrics for Prometheus.
-By default we have the `smap` `pss`, but we can easily add the `rss`, `size` or your custom settings:
+    prometheus.start()
 
-Example:
 
+def post_fork(server, worker):
+    from c2cwsgiutils import prometheus
+
+    del server, worker
+
+    prometheus.cleanup()
+
+
+def child_exit(server, worker):
+    del server
+
+    multiprocess.mark_process_dead(worker.pid)
+```
+
+In your `Dockerfile` you should add:
+
+```dockerfile
+RUN mkdir -p /prometheus-metrics \
+    chmod a+rwx /prometheus-metrics
+ENV PROMETHEUS_MULTIPROC_DIR=/prometheus-metrics
 ```
-from import c2cwsgiutils.metrics import add_provider, Provider, MemoryMapProvider
 
-class CustomProvider(Provider):
-    def __init__(self):
-        super().__init__("my_metrics", "My Metric")
+### Add custom metric collector
 
-    def get_data(self):
-        return [({'metadata_key': 'matadata_value'}, metrics_value)]
+See [official documentation](https://github.com/prometheus/client_python#custom-collectors).
 
-add_provider(MemoryMapProvider('rss'))
-add_provider(CustomProvider())
+Related to the Unix process.
+
+```python
+from c2cwsgiutils import broadcast, prometheus
+
+prometheus.MULTI_PROCESS_COLLECTOR_BROADCAST_CHANNELS.append("prometheus_collector_custom")
+broadcast.subscribe("c2cwsgiutils_prometheus_collect_gc", _broadcast_collector_custom)
+my_custom_collector_instance = MyCustomCollector()
+
+
+def _broadcast_collector_custom() -> List[prometheus.SerializedGauge]:
+    """Get the collected GC gauges."""
+
+    return prometheus.serialize_collected_data(my_custom_collector_instance)
 ```
 
+Related to the host, use that in the `gunicorn.conf.py`:
+
+```python
+def on_starting(server):
+    from c2cwsgiutils import prometheus
+
+    del server
+
+    registry = CollectorRegistry()
+    registry.register(MyCollector())
+    prometheus.start(registry)
+```
+
+### Database metrics
+
+Look at the `c2cwsgiutils-stats-db` utility if you want to generate statistics (gauges) about the
+row counts.
+
+### Usage of metrics
+
+With c2cwsgiutils each instance (Pod) has its own metrics, so we need to aggregate them to have the metrics for the service you probably need to use `sum by (<fields>) (<metric>)` to get the metric (especially for counters).
+
 ## Custom scripts
 
 To have the application initialized in a script you should use the
@@ -531,7 +562,7 @@ have dumps of a few things:
 - memory usage: `{C2C_BASE_PATH}/debug/memory?secret={C2C_SECRET}&limit=30&analyze_type=builtins.dict&python_internals_map=false`
 - object ref: `{C2C_BASE_PATH}/debug/show_refs.dot?secret={C2C_SECRET}&analyze_type=gunicorn.app.wsgiapp.WSGIApplication&analyze_id=12345&max_depth=3&too_many=10&filter=1024&no_extra_info&backrefs`
   `analyze_type` and `analyze_id` should not ve used toogether, you can use it like:
-  ```
+  ```bash
   curl "<URL>" > /tmp/show_refs.dot
   dot -Lg -Tpng /tmp/show_refs.dot > /tmp/show_refs.png
   ```
@@ -592,7 +623,7 @@ client. In production mode, you can still get them by sending the secret defined
 
 If you want to use pyramid_debugtoolbar, you need to disable exception handling and configure it like that:
 
-```
+```ini
 pyramid.includes =
     pyramid_debugtoolbar
 debugtoolbar.enabled = true

diff --git a/acceptance_tests/app/c2cwsgiutils_app/__init__.py b/acceptance_tests/app/c2cwsgiutils_app/__init__.py
@@ -25,6 +25,7 @@ def main(_, **settings):
     """
     This function returns a Pyramid WSGI application.
     """
+
     config = Configurator(settings=settings, route_prefix="/api")
 
     # Initialize the broadcast view before c2cwsgiutils is initialized. This allows to test the

diff --git a/acceptance_tests/app/c2cwsgiutils_app/services.py b/acceptance_tests/app/c2cwsgiutils_app/services.py
@@ -1,5 +1,6 @@
 import logging
 
+import prometheus_client
 import requests
 from pyramid.httpexceptions import (
     HTTPBadRequest,
@@ -10,10 +11,14 @@
 )
 
 from c2cwsgiutils import sentry, services
-from c2cwsgiutils.stats import increment_counter, set_gauge, timer_context
 
 from c2cwsgiutils_app import models
 
+_PROMETHEUS_TEST_COUNTER = prometheus_client.Counter("test_counter", "Test counter")
+_PROMETHEUS_TEST_GAUGE = prometheus_client.Gauge("test_gauge", "Test gauge", ["value", "toto"])
+_PROMETHEUS_TEST_SUMMARY = prometheus_client.Summary("test_summary", "Test summary")
+
+
 ping_service = services.create("ping", "/ping")
 hello_service = services.create("hello", "/hello", cors_credentials=True)
 error_service = services.create("error", "/error")
@@ -39,10 +44,10 @@ def hello_get(request):
     """
     Will use the slave.
     """
-    with timer_context(["sql", "read_hello"]):
+    with _PROMETHEUS_TEST_SUMMARY.time():
         hello = request.dbsession.query(models.Hello).first()
-    increment_counter(["test", "counter"])
-    set_gauge(["test", "gauge/s"], 42, tags={"value": 24, "toto": "tutu"})
+    _PROMETHEUS_TEST_COUNTER.inc()
+    _PROMETHEUS_TEST_GAUGE.labels(value=24, toto="tutu").set(42)
     return {"value": hello.value}
 
 

diff --git a/acceptance_tests/app/gunicorn.conf.py b/acceptance_tests/app/gunicorn.conf.py
@@ -4,6 +4,8 @@
 ###
 import os
 
+from prometheus_client import multiprocess
+
 from c2cwsgiutils import get_config_defaults, get_logconfig_dict, get_paste_config
 
 bind = ":8080"
@@ -31,3 +33,25 @@
     print(logconfig_dict)
 
 raw_paste_global_conf = ["=".join(e) for e in get_config_defaults().items()]
+
+
+def on_starting(server):
+    from c2cwsgiutils import prometheus
+
+    del server
+
+    prometheus.start()
+
+
+def post_fork(server, worker):
+    from c2cwsgiutils import prometheus
+
+    del server, worker
+
+    prometheus.cleanup()
+
+
+def child_exit(server, worker):
+    del server
+
+    multiprocess.mark_process_dead(worker.pid)
diff --git a/acceptance_tests/tests/.env b/acceptance_tests/tests/.env
@@ -1,3 +1,4 @@
 COMPOSE_PROJECT_NAME=c2cwsgiutils
 SQLALCHEMY_URL=postgresql://www-data:www-data@db:5432/test
 SQLALCHEMY_SLAVE_URL=postgresql://www-data:www-data@db_slave:5432/test
+TEST_IP=172.17.0.1
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,6 +25,7 @@ def main(_, **settings): @@
         """
         This function returns a Pyramid WSGI application.
         """
         config = Configurator(settings=settings, route_prefix="/api")
         # Initialize the broadcast view before c2cwsgiutils is initialized. This allows to test the
@@ Expand Down @@