From 7e403a0c713e873bd245ca6c54746af2d28ad753 Mon Sep 17 00:00:00 2001 From: Bharath Vissapragada Date: Mon, 18 Mar 2024 17:41:06 -0700 Subject: [PATCH] raft/recovery: better metric for recovery bandwidth consumption Adds a new public_metric for recovery throttle that now tracks how much bandwidth is consumed for raft recovery. This is more useful because it helps tune raft_learner_recovery_rate. If the entire bandwidth is in use, time to bump recovery rate. (cherry picked from commit e9968857f9b6f442b8e5ae65ae0f7dda7afca208) --- src/v/raft/coordinated_recovery_throttle.cc | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/v/raft/coordinated_recovery_throttle.cc b/src/v/raft/coordinated_recovery_throttle.cc index 40a184b66609..f058c68d266a 100644 --- a/src/v/raft/coordinated_recovery_throttle.cc +++ b/src/v/raft/coordinated_recovery_throttle.cc @@ -91,11 +91,22 @@ void coordinated_recovery_throttle::setup_metrics() { namespace sm = ss::metrics; _public_metrics.add_group( prometheus_sanitize::metrics_name("raft:recovery"), - {sm::make_gauge( - "partition_movement_available_bandwidth", - [this] { return _throttler.available(); }, - sm::description( - "Bandwidth available for partition movement. bytes/sec"))}); + {// note: deprecate partition_movement_available_bandwidth + // in favor of partition_movement_consumed_bandwidth when + // possible. + sm::make_gauge( + "partition_movement_available_bandwidth", + [this] { return _throttler.available(); }, + sm::description( + "Bandwidth available for partition movement. bytes/sec")), + sm::make_gauge( + "partition_movement_consumed_bandwidth", + [this] { + return _throttler.last_reset_capacity() + - _throttler.available(); + }, + sm::description( + "Bandwidth consumed for partition movement. bytes/sec"))}); } }