From 0e8064b049adaca18a0c1fbed4bae884265e23de Mon Sep 17 00:00:00 2001 From: Daniel Babiak Date: Mon, 4 Dec 2023 21:19:51 -0500 Subject: [PATCH] only report wait times from clients currently waiting to match behavior of pgbouncer (#655) * Change maxwait to only report wait times from clients currently waiting to match behavior of pgbouncer * Fix tests --- src/stats/client.rs | 15 ++++++++++----- src/stats/pool.rs | 14 +++++++++++--- tests/ruby/stats_spec.rb | 16 ++++++++++++---- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/stats/client.rs b/src/stats/client.rs index 6a30ec15..0079fbad 100644 --- a/src/stats/client.rs +++ b/src/stats/client.rs @@ -38,8 +38,10 @@ pub struct ClientStats { /// Total time spent waiting for a connection from pool, measures in microseconds pub total_wait_time: Arc, - /// Maximum time spent waiting for a connection from pool, measures in microseconds - pub max_wait_time: Arc, + /// When this client started waiting. + /// Stored as microseconds since connect_time so it can fit in an AtomicU64 instead + /// of us using an "AtomicInstant" + pub wait_start: Arc, /// Current state of the client pub state: Arc, @@ -63,7 +65,7 @@ impl Default for ClientStats { username: String::new(), pool_name: String::new(), total_wait_time: Arc::new(AtomicU64::new(0)), - max_wait_time: Arc::new(AtomicU64::new(0)), + wait_start: Arc::new(AtomicU64::new(0)), state: Arc::new(AtomicClientState::new(ClientState::Idle)), transaction_count: Arc::new(AtomicU64::new(0)), query_count: Arc::new(AtomicU64::new(0)), @@ -111,6 +113,11 @@ impl ClientStats { /// Reports a client is waiting for a connection pub fn waiting(&self) { + // safe to truncate, we only lose info if duration is greater than ~585,000 years + self.wait_start.store( + Instant::now().duration_since(self.connect_time).as_micros() as u64, + Ordering::Relaxed, + ); self.state.store(ClientState::Waiting, Ordering::Relaxed); } @@ -134,8 +141,6 @@ impl ClientStats { pub fn checkout_time(&self, microseconds: u64) { self.total_wait_time .fetch_add(microseconds, Ordering::Relaxed); - self.max_wait_time - .fetch_max(microseconds, Ordering::Relaxed); } /// Report a query executed by a client against a server diff --git a/src/stats/pool.rs b/src/stats/pool.rs index 46c74632..a3724bdd 100644 --- a/src/stats/pool.rs +++ b/src/stats/pool.rs @@ -4,6 +4,7 @@ use super::{ClientState, ServerState}; use crate::{config::PoolMode, messages::DataType, pool::PoolIdentifier}; use std::collections::HashMap; use std::sync::atomic::*; +use tokio::time::Instant; use crate::pool::get_all_pools; @@ -53,6 +54,7 @@ impl PoolStats { ); } + let now = Instant::now(); for client in client_map.values() { match map.get_mut(&PoolIdentifier { db: client.pool_name(), @@ -62,10 +64,16 @@ impl PoolStats { match client.state.load(Ordering::Relaxed) { ClientState::Active => pool_stats.cl_active += 1, ClientState::Idle => pool_stats.cl_idle += 1, - ClientState::Waiting => pool_stats.cl_waiting += 1, + ClientState::Waiting => { + pool_stats.cl_waiting += 1; + // wait_start is measured as microseconds since connect_time + // so compute wait_time as (now() - connect_time) - (wait_start - connect_time) + let duration_since_connect = now.duration_since(client.connect_time()); + let wait_time = (duration_since_connect.as_micros() as u64) + - client.wait_start.load(Ordering::Relaxed); + pool_stats.maxwait = std::cmp::max(pool_stats.maxwait, wait_time); + } } - let max_wait = client.max_wait_time.load(Ordering::Relaxed); - pool_stats.maxwait = std::cmp::max(pool_stats.maxwait, max_wait); } None => debug!("Client from an obselete pool"), } diff --git a/tests/ruby/stats_spec.rb b/tests/ruby/stats_spec.rb index ddf63cd3..2d635f0a 100644 --- a/tests/ruby/stats_spec.rb +++ b/tests/ruby/stats_spec.rb @@ -233,7 +233,7 @@ sleep(1.1) # Allow time for stats to update admin_conn = PG::connect(processes.pgcat.admin_connection_string) results = admin_conn.async_exec("SHOW POOLS")[0] - %w[cl_idle cl_cancel_req sv_idle sv_used sv_tested sv_login maxwait].each do |s| + %w[cl_idle cl_cancel_req sv_idle sv_used sv_tested sv_login].each do |s| raise StandardError, "Field #{s} was expected to be 0 but found to be #{results[s]}" if results[s] != "0" end @@ -260,12 +260,20 @@ threads << Thread.new { c.async_exec("SELECT pg_sleep(1.5)") rescue nil } end - sleep(2.5) # Allow time for stats to update admin_conn = PG::connect(processes.pgcat.admin_connection_string) - results = admin_conn.async_exec("SHOW POOLS")[0] + # two connections waiting => they report wait time + sleep(1.1) # Allow time for stats to update + results = admin_conn.async_exec("SHOW POOLS")[0] expect(results["maxwait"]).to eq("1") - expect(results["maxwait_us"].to_i).to be_within(200_000).of(500_000) + expect(results["maxwait_us"].to_i).to be_within(200_000).of(100_000) + + sleep(2.5) # Allow time for stats to update + results = admin_conn.async_exec("SHOW POOLS")[0] + + # no connections waiting => no reported wait time + expect(results["maxwait"]).to eq("0") + expect(results["maxwait_us"]).to eq("0") connections.map(&:close) sleep(4.5) # Allow time for stats to update