From 11a0f99fa52947929b50bf959d652168b0b071eb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 3 Mar 2021 16:56:53 +0000 Subject: [PATCH] Replace `last_*_pdu_age` metrics with timestamps Following the advice at https://prometheus.io/docs/practices/instrumentation/#timestamps-not-time-since, it's preferable to export unix timestamps, not ages. There doesn't seem to be any particular naming convention for timestamp metrics. --- changelog.d/9540.feature | 1 + changelog.d/9540.removal | 1 + synapse/federation/federation_server.py | 10 ++++------ synapse/federation/sender/transaction_manager.py | 11 +++++------ 4 files changed, 11 insertions(+), 12 deletions(-) create mode 100644 changelog.d/9540.feature create mode 100644 changelog.d/9540.removal diff --git a/changelog.d/9540.feature b/changelog.d/9540.feature new file mode 100644 index 000000000000..5417e51b93eb --- /dev/null +++ b/changelog.d/9540.feature @@ -0,0 +1 @@ +Add `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time` prometheus metrics, which monitor federation delays by reporting the timestamps of messages sent and received to a set of remote servers. diff --git a/changelog.d/9540.removal b/changelog.d/9540.removal new file mode 100644 index 000000000000..d54f553cb9b9 --- /dev/null +++ b/changelog.d/9540.removal @@ -0,0 +1 @@ +The `synapse_federation_last_sent_pdu_age` and `synapse_federation_last_received_pdu_age` prometheus metrics have been removed. They are replaced by `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time`. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 2f832b47f65c..362895bf4286 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -90,10 +90,9 @@ "Time taken to process an event", ) - -last_pdu_age_metric = Gauge( - "synapse_federation_last_received_pdu_age", - "The age (in seconds) of the last PDU successfully received from the given domain", +last_pdu_ts_metric = Gauge( + "synapse_federation_last_received_pdu_time", + "The timestamp of the last PDU which was successfully received from the given domain", labelnames=("server_name",), ) @@ -369,8 +368,7 @@ async def process_pdus_for_room(room_id: str): ) if newest_pdu_ts and origin in self._federation_metrics_domains: - newest_pdu_age = self._clock.time_msec() - newest_pdu_ts - last_pdu_age_metric.labels(server_name=origin).set(newest_pdu_age / 1000) + last_pdu_ts_metric.labels(server_name=origin).set(newest_pdu_ts / 1000) return pdu_results diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 763aff296c87..2a9cd063c4ee 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -36,9 +36,9 @@ logger = logging.getLogger(__name__) -last_pdu_age_metric = Gauge( - "synapse_federation_last_sent_pdu_age", - "The age (in seconds) of the last PDU successfully sent to the given domain", +last_pdu_ts_metric = Gauge( + "synapse_federation_last_sent_pdu_time", + "The timestamp of the last PDU which was successfully sent to the given domain", labelnames=("server_name",), ) @@ -187,9 +187,8 @@ def json_data_cb(): if success and pdus and destination in self._federation_metrics_domains: last_pdu = pdus[-1] - last_pdu_age = self.clock.time_msec() - last_pdu.origin_server_ts - last_pdu_age_metric.labels(server_name=destination).set( - last_pdu_age / 1000 + last_pdu_ts_metric.labels(server_name=destination).set( + last_pdu.origin_server_ts / 1000 ) set_tag(tags.ERROR, not success)