From 06834fd31b6ef329d3ee964fdd9621bed2c7aa09 Mon Sep 17 00:00:00 2001 From: "Eduardo J. Ortega U" <5791035+ejortegau@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:48:05 +0200 Subject: [PATCH 1/2] Fix healthcheck filters https://github.com/vitessio/vitess/pull/16871 added filtering in the health check, but later https://github.com/slackhq/vitess/pull/514 undid that. So aiming at fixing it here. With this change, essentially, the filters passed in NewHealthCheck() get actually applied to the topology watchers, instead of it using a newly instantiated, empty filter as it's currently happening. Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com> --- go/vt/discovery/healthcheck.go | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index 3ac83d79ef1..8d0476ba382 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -365,7 +365,6 @@ func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Dur cellAliases: make(map[string]string), } var topoWatchers []*TopologyWatcher - var filter TabletFilter cells := strings.Split(cellsToWatch, ",") if cellsToWatch == "" { cells = append(cells, localCell) @@ -376,19 +375,8 @@ func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Dur if c == "" { continue } - if len(tabletFilters) > 0 { - if len(KeyspacesToWatch) > 0 { - log.Exitf("Only one of -keyspaces_to_watch and -tablet_filters may be specified at a time") - } - fbs, err := NewFilterByShard(tabletFilters) - if err != nil { - log.Exitf("Cannot parse tablet_filters parameter: %v", err) - } - filter = fbs - } else if len(KeyspacesToWatch) > 0 { - filter = NewFilterByKeyspace(KeyspacesToWatch) - } - topoWatchers = append(topoWatchers, NewTopologyWatcher(ctx, topoServer, hc, filter, c, refreshInterval, refreshKnownTablets, topoReadConcurrency)) + + topoWatchers = append(topoWatchers, NewTopologyWatcher(ctx, topoServer, hc, filters, c, refreshInterval, refreshKnownTablets, topoReadConcurrency)) } hc.topoWatchers = topoWatchers From a71d3b0285907386ea583374e6de50161a013c42 Mon Sep 17 00:00:00 2001 From: "Eduardo J. Ortega U" <5791035+ejortegau@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:11:23 +0200 Subject: [PATCH 2/2] Improve tests by getting them from https://github.com/vitessio/vitess/pull/16871 Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com> --- go/vt/discovery/topology_watcher_test.go | 36 ++++++++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/go/vt/discovery/topology_watcher_test.go b/go/vt/discovery/topology_watcher_test.go index c984606f823..485bb23b09d 100644 --- a/go/vt/discovery/topology_watcher_test.go +++ b/go/vt/discovery/topology_watcher_test.go @@ -112,10 +112,11 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) { defer ts.Close() fhc := NewFakeHealthCheck(nil) defer fhc.Close() + filter := NewFilterByKeyspace([]string{"keyspace"}) logger := logutil.NewMemoryLogger() topologyWatcherOperations.ZeroAll() counts := topologyWatcherOperations.Counts() - tw := NewTopologyWatcher(context.Background(), ts, fhc, nil, "aa", 10*time.Minute, refreshKnownTablets, 5) + tw := NewTopologyWatcher(context.Background(), ts, fhc, filter, "aa", 10*time.Minute, refreshKnownTablets, 5) counts = checkOpCounts(t, counts, map[string]int64{}) checkChecksum(t, tw, 0) @@ -162,10 +163,31 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) { require.NoError(t, ts.CreateTablet(context.Background(), tablet2), "CreateTablet failed for %v", tablet2.Alias) tw.loadTablets() + // Confirm second tablet triggers ListTablets + AddTablet calls. counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "AddTablet": 1}) checkChecksum(t, tw, 2762153755) - // Check the new tablet is returned by GetAllTablets(). + // Add a third tablet in a filtered keyspace to the topology. + tablet3 := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "aa", + Uid: 3, + }, + Hostname: "host3", + PortMap: map[string]int32{ + "vt": 789, + }, + Keyspace: "excluded", + Shard: "shard", + } + require.NoError(t, ts.CreateTablet(context.Background(), tablet3), "CreateTablet failed for %v", tablet3.Alias) + tw.loadTablets() + + // Confirm filtered tablet did not trigger an AddTablet call. + counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "AddTablet": 0}) + checkChecksum(t, tw, 3177315266) + + // Check the second tablet is returned by GetAllTablets(). This should not contain the filtered tablet. allTablets = fhc.GetAllTablets() key = TabletToMapKey(tablet2) assert.Len(t, allTablets, 2) @@ -197,14 +219,14 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) { assert.Contains(t, allTablets, key) assert.True(t, proto.Equal(tablet, allTablets[key])) assert.NotContains(t, allTablets, origKey) - checkChecksum(t, tw, 2762153755) + checkChecksum(t, tw, 3177315266) } else { counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "ReplaceTablet": 0}) assert.Len(t, allTablets, 2) assert.Contains(t, allTablets, origKey) assert.True(t, proto.Equal(origTablet, allTablets[origKey])) assert.NotContains(t, allTablets, key) - checkChecksum(t, tw, 2762153755) + checkChecksum(t, tw, 3177315266) } // Both tablets restart on different hosts. @@ -260,7 +282,7 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) { require.Nil(t, err, "FixShardReplication failed") tw.loadTablets() counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "RemoveTablet": 1}) - checkChecksum(t, tw, 789108290) + checkChecksum(t, tw, 852159264) allTablets = fhc.GetAllTablets() assert.Len(t, allTablets, 1) @@ -271,8 +293,10 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) { assert.Contains(t, allTablets, key) assert.True(t, proto.Equal(tablet2, allTablets[key])) - // Remove the other and check that it is detected as being gone. + // Remove the other tablets and check that it is detected as being gone. + // Deleting the filtered tablet should not trigger a RemoveTablet call. require.NoError(t, ts.DeleteTablet(context.Background(), tablet2.Alias)) + require.NoError(t, ts.DeleteTablet(context.Background(), tablet3.Alias)) _, err = topo.FixShardReplication(context.Background(), ts, logger, "aa", "keyspace", "shard") require.Nil(t, err, "FixShardReplication failed") tw.loadTablets()