From 2987323351492397820f904820a09cd0591eac88 Mon Sep 17 00:00:00 2001 From: Tanjin Xu <109303790+tanjinx@users.noreply.github.com> Date: Tue, 9 May 2023 11:01:47 -0700 Subject: [PATCH] `slack-vitess-r14.0.5:`cherrypick: backport rdonly patch (#77) * backport pr 29 * fix missing variable * fix missing patch * update help text --- go/flags/endtoend/vtgate.txt | 2 ++ go/vt/vtgate/tabletgateway.go | 23 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt index 4c7f90efd07..7a1c0c853ee 100644 --- a/go/flags/endtoend/vtgate.txt +++ b/go/flags/endtoend/vtgate.txt @@ -67,6 +67,8 @@ Usage of vtgate: gate server query cache size, maximum number of queries to be cached. vtgate analyzes every incoming query and generate a query plan, these plans are being cached in a cache. This config controls the expected amount of unique entries in the cache. (default 5000) --gateway_initial_tablet_timeout duration At startup, the tabletGateway will wait up to this duration to get at least one tablet per keyspace/shard/tablet type (default 30s) + --gateway_route_replica_to_rdonly + route REPLICA queries to RDONLY tablets as well as REPLICA tablets --grpc_auth_mode string Which auth plugin implementation to use (eg: static) --grpc_auth_mtls_allowed_substrings string diff --git a/go/vt/vtgate/tabletgateway.go b/go/vt/vtgate/tabletgateway.go index a03b4d02710..17d7018eb12 100644 --- a/go/vt/vtgate/tabletgateway.go +++ b/go/vt/vtgate/tabletgateway.go @@ -49,7 +49,8 @@ var ( bufferImplementation = flag.String("buffer_implementation", "keyspace_events", "Allowed values: healthcheck (legacy implementation), keyspace_events (default)") initialTabletTimeout = flag.Duration("gateway_initial_tablet_timeout", 30*time.Second, "At startup, the tabletGateway will wait up to this duration to get at least one tablet per keyspace/shard/tablet type") // retryCount is the number of times a query will be retried on error - retryCount = flag.Int("retry-count", 2, "retry count") + retryCount = flag.Int("retry-count", 2, "retry count") + routeReplicaToRdonly = flag.Bool("gateway_route_replica_to_rdonly", false, "route REPLICA queries to RDONLY tablets as well as REPLICA tablets") ) // TabletGateway implements the Gateway interface. @@ -278,6 +279,20 @@ func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, } tablets := gw.hc.GetHealthyTabletStats(target) + + // temporary hack to enable REPLICA type queries to address both REPLICA tablets and RDONLY tablets + // original commit - https://github.com/tinyspeck/vitess/pull/166/commits/2552b4ce25a9fdb41ff07fa69f2ccf485fea83ac + // discoverygateway patch - https://github.com/slackhq/vitess/commit/47adb7c8fc720cb4cb7a090530b3e88d310ff6d3 + if *routeReplicaToRdonly && target.TabletType == topodatapb.TabletType_REPLICA { + // Create a new target for the same original keyspace/shard, but RDONLY tablet type. + rdonlyTarget := &querypb.Target{ + Keyspace: target.Keyspace, + Shard: target.Shard, + TabletType: topodatapb.TabletType_RDONLY, + } + tablets = append(tablets, gw.hc.GetHealthyTabletStats(rdonlyTarget)...) + } + if len(tablets) == 0 { // if we have a keyspace event watcher, check if the reason why our primary is not available is that it's currently being resharded // or if a reparent operation is in progress. @@ -326,7 +341,11 @@ func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, startTime := time.Now() var canRetry bool - canRetry, err = inner(ctx, target, th.Conn) + if *routeReplicaToRdonly && target.TabletType == topodatapb.TabletType_REPLICA { + canRetry, err = inner(ctx, th.Target, th.Conn) + } else { + canRetry, err = inner(ctx, target, th.Conn) + } gw.updateStats(target, startTime, err) if canRetry { invalidTablets[topoproto.TabletAliasString(tabletLastUsed.Alias)] = true