From 4ae1f2e2bd72e43fabd030f64d017401cae918d4 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 16:29:34 +0300 Subject: [PATCH] Throttler: set timeouts on gRPC communication and on topo communication (#14165) Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletserver/throttle/throttler.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/go/vt/vttablet/tabletserver/throttle/throttler.go b/go/vt/vttablet/tabletserver/throttle/throttler.go index 42c403a2295..7c681c4b293 100644 --- a/go/vt/vttablet/tabletserver/throttle/throttler.go +++ b/go/vt/vttablet/tabletserver/throttle/throttler.go @@ -714,6 +714,10 @@ func (throttler *Throttler) Operate(ctx context.Context) { func (throttler *Throttler) generateTabletHTTPProbeFunction(ctx context.Context, tmClient tmclient.TabletManagerClient, clusterName string, probe *mysql.Probe) (probeFunc func() *mysql.MySQLThrottleMetric) { return func() *mysql.MySQLThrottleMetric { + // Some reasonable timeout, to ensure we release connections even if they're hanging (otherwise grpc-go keeps polling those connections forever) + ctx, cancel := context.WithTimeout(ctx, 4*mysqlCollectInterval) + defer cancel() + // Hit a tablet's `check-self` via HTTP, and convert its CheckResult JSON output into a MySQLThrottleMetric mySQLThrottleMetric := mysql.NewMySQLThrottleMetric() mySQLThrottleMetric.ClusterName = clusterName @@ -872,6 +876,9 @@ func (throttler *Throttler) refreshMySQLInventory(ctx context.Context) error { } // The primary tablet is also in charge of collecting the shard's metrics err := func() error { + ctx, cancel := context.WithTimeout(ctx, mysqlRefreshInterval) + defer cancel() + tabletAliases, err := throttler.ts.FindAllTabletAliasesInShard(ctx, throttler.keyspace, throttler.shard) if err != nil { return err