From d70e286d24b79e885c604314e7582e93628ccb7d Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Sun, 7 Mar 2021 02:03:55 -0500 Subject: [PATCH] kvserver: make the StoreRebalancer aware of non-voters This commit teaches the `StoreRebalancer` to rebalance non-voting replicas. Release justification: needed for non-voting replicas Release note: None --- pkg/kv/kvserver/store_rebalancer.go | 456 +++++++++++++++++------ pkg/kv/kvserver/store_rebalancer_test.go | 378 +++++++++++++++---- 2 files changed, 649 insertions(+), 185 deletions(-) diff --git a/pkg/kv/kvserver/store_rebalancer.go b/pkg/kv/kvserver/store_rebalancer.go index f60ed2342bdd..a97e91afcec6 100644 --- a/pkg/kv/kvserver/store_rebalancer.go +++ b/pkg/kv/kvserver/store_rebalancer.go @@ -17,6 +17,7 @@ import ( "sort" "time" + "github.com/cockroachdb/cockroach/pkg/config/zonepb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" @@ -292,7 +293,7 @@ func (sr *StoreRebalancer) rebalanceStore( replicasToMaybeRebalance = append(replicasToMaybeRebalance, hottestRanges...) for localDesc.Capacity.QueriesPerSecond > qpsMaxThreshold { - replWithStats, voterTargets := sr.chooseReplicaToRebalance( + replWithStats, voterTargets, nonVoterTargets := sr.chooseRangeToRebalance( ctx, &replicasToMaybeRebalance, localDesc, @@ -312,8 +313,7 @@ func (sr *StoreRebalancer) rebalanceStore( replWithStats.repl.RangeID, replWithStats.qps, descBeforeRebalance.Replicas(), voterTargets) timeout := sr.rq.processTimeoutFunc(sr.st, replWithStats.repl) if err := contextutil.RunWithTimeout(ctx, "relocate range", timeout, func(ctx context.Context) error { - // TODO(aayush): Fix when we can make decisions about rebalancing non-voting replicas. - return sr.rq.store.AdminRelocateRange(ctx, *descBeforeRebalance, voterTargets, []roachpb.ReplicationTarget{}) + return sr.rq.store.AdminRelocateRange(ctx, *descBeforeRebalance, voterTargets, nonVoterTargets) }); err != nil { log.Errorf(ctx, "unable to relocate range to %v: %+v", voterTargets, err) continue @@ -396,9 +396,9 @@ func (sr *StoreRebalancer) chooseLeaseToTransfer( log.VEventf(ctx, 3, "considering lease transfer for r%d with %.2f qps", desc.RangeID, replWithStats.qps) - // Check all the other replicas in order of increasing qps. Learner replicas - // aren't allowed to become the leaseholder or raft leader, so only consider - // the `Voters` replicas. + // Check all the other voting replicas in order of increasing qps. + // Learners or non-voters aren't allowed to become leaseholders or raft + // leaders, so only consider the `Voter` replicas. candidates := desc.Replicas().DeepCopy().VoterDescriptors() sort.Slice(candidates, func(i, j int) bool { var iQPS, jQPS float64 @@ -462,7 +462,17 @@ func (sr *StoreRebalancer) chooseLeaseToTransfer( } } -func (sr *StoreRebalancer) chooseReplicaToRebalance( +// rangeRebalanceContext represents a snapshot of a range's state during the +// StoreRebalancer's attempt to rebalance it based on QPS. +type rangeRebalanceContext struct { + replWithStats replicaWithStats + rangeDesc *roachpb.RangeDescriptor + zone *zonepb.ZoneConfig + clusterNodes int + numDesiredVoters, numDesiredNonVoters int +} + +func (sr *StoreRebalancer) chooseRangeToRebalance( ctx context.Context, hottestRanges *[]replicaWithStats, localDesc *roachpb.StoreDescriptor, @@ -470,17 +480,17 @@ func (sr *StoreRebalancer) chooseReplicaToRebalance( storeMap map[roachpb.StoreID]*roachpb.StoreDescriptor, minQPS float64, maxQPS float64, -) (replicaWithStats, []roachpb.ReplicationTarget) { +) (replWithStats replicaWithStats, voterTargets, nonVoterTargets []roachpb.ReplicationTarget) { now := sr.rq.store.Clock().NowAsClockTimestamp() for { if len(*hottestRanges) == 0 { - return replicaWithStats{}, nil + return replicaWithStats{}, nil, nil } replWithStats := (*hottestRanges)[0] *hottestRanges = (*hottestRanges)[1:] if replWithStats.repl == nil { - return replicaWithStats{}, nil + return replicaWithStats{}, nil, nil } if shouldNotMoveAway(ctx, replWithStats, localDesc, now, minQPS) { @@ -494,93 +504,35 @@ func (sr *StoreRebalancer) chooseReplicaToRebalance( const minQPSFraction = .001 if replWithStats.qps < localDesc.Capacity.QueriesPerSecond*minQPSFraction && float64(localDesc.Capacity.RangeCount) <= storeList.candidateRanges.mean { - log.VEventf(ctx, 5, "r%d's %.2f qps is too little to matter relative to s%d's %.2f total qps", - replWithStats.repl.RangeID, replWithStats.qps, localDesc.StoreID, localDesc.Capacity.QueriesPerSecond) + log.VEventf( + ctx, + 5, + "r%d's %.2f qps is too little to matter relative to s%d's %.2f total qps", + replWithStats.repl.RangeID, + replWithStats.qps, + localDesc.StoreID, + localDesc.Capacity.QueriesPerSecond, + ) continue } - desc, zone := replWithStats.repl.DescAndZone() log.VEventf(ctx, 3, "considering replica rebalance for r%d with %.2f qps", - desc.RangeID, replWithStats.qps) - + replWithStats.repl.GetRangeID(), replWithStats.qps) + rangeDesc, zone := replWithStats.repl.DescAndZone() clusterNodes := sr.rq.allocator.storePool.ClusterNodeCount() - desiredVoters := GetNeededVoters(zone.GetNumVoters(), clusterNodes) - targets := make([]roachpb.ReplicationTarget, 0, desiredVoters) - targetVoters := make([]roachpb.ReplicaDescriptor, 0, desiredVoters) - currentVoters := desc.Replicas().VoterDescriptors() - currentNonVoters := desc.Replicas().NonVoterDescriptors() - - // Check the range's existing diversity score, since we want to ensure we - // don't hurt locality diversity just to improve QPS. - curDiversity := rangeDiversityScore( - sr.rq.allocator.storePool.getLocalitiesByStore(currentVoters)) - - // Check the existing replicas, keeping around those that aren't overloaded. - for i := range currentVoters { - if currentVoters[i].StoreID == localDesc.StoreID { - continue - } - // Keep the replica in the range if we don't know its QPS or if its QPS - // is below the upper threshold. Punishing stores not in our store map - // could cause mass evictions if the storePool gets out of sync. - storeDesc, ok := storeMap[currentVoters[i].StoreID] - if !ok || storeDesc.Capacity.QueriesPerSecond < maxQPS { - if log.V(3) { - var reason redact.RedactableString - if ok { - reason = redact.Sprintf(" (qps %.2f vs max %.2f)", storeDesc.Capacity.QueriesPerSecond, maxQPS) - } - log.VEventf(ctx, 3, "keeping r%d/%d on s%d%s", desc.RangeID, currentVoters[i].ReplicaID, currentVoters[i].StoreID, reason) - } - targets = append(targets, roachpb.ReplicationTarget{ - NodeID: currentVoters[i].NodeID, - StoreID: currentVoters[i].StoreID, - }) - targetVoters = append(targetVoters, roachpb.ReplicaDescriptor{ - NodeID: currentVoters[i].NodeID, - StoreID: currentVoters[i].StoreID, - }) - } - } - - // Then pick out which new stores to add the remaining replicas to. - options := sr.rq.allocator.scorerOptions() - options.qpsRebalanceThreshold = qpsRebalanceThreshold.Get(&sr.st.SV) - for len(targets) < desiredVoters { - // Use the preexisting AllocateVoter logic to ensure that considerations - // such as zone constraints, locality diversity, and full disk come - // into play. - target, _ := sr.rq.allocator.allocateTargetFromList( - ctx, - storeList, - zone, - targetVoters, - currentNonVoters, - options, - // TODO(aayush): For now, we're not going to let the StoreRebalancer - // rebalance non-voting replicas. Fix this. - voterTarget, - ) - if target == nil { - log.VEventf(ctx, 3, "no rebalance targets found to replace the current store for r%d", - desc.RangeID) - break - } - - meanQPS := storeList.candidateQueriesPerSecond.mean - if sr.shouldNotMoveTo(ctx, storeMap, replWithStats, target.StoreID, meanQPS, minQPS, maxQPS) { - break - } - - targets = append(targets, roachpb.ReplicationTarget{ - NodeID: target.Node.NodeID, - StoreID: target.StoreID, - }) - targetVoters = append(targetVoters, roachpb.ReplicaDescriptor{ - NodeID: target.Node.NodeID, - StoreID: target.StoreID, - }) + numDesiredVoters := GetNeededVoters(zone.GetNumVoters(), clusterNodes) + + rebalanceCtx := rangeRebalanceContext{ + replWithStats: replWithStats, + rangeDesc: rangeDesc, + zone: zone, + clusterNodes: clusterNodes, + numDesiredVoters: numDesiredVoters, + numDesiredNonVoters: GetNeededNonVoters(numDesiredVoters, int(zone.GetNumNonVoters()), clusterNodes), } + targetVoterRepls, targetNonVoterRepls := sr.getRebalanceCandidatesBasedOnQPS( + ctx, rebalanceCtx, localDesc, storeMap, storeList, minQPS, maxQPS, + ) // If we couldn't find enough valid targets, forget about this range. // @@ -589,28 +541,53 @@ func (sr *StoreRebalancer) chooseReplicaToRebalance( // moving one of the other existing replicas that's on a store with less // qps than the max threshold but above the mean would help in certain // locality configurations. - if len(targets) < desiredVoters { - log.VEventf(ctx, 3, "couldn't find enough rebalance targets for r%d (%d/%d)", - desc.RangeID, len(targets), desiredVoters) + if len(targetVoterRepls) < rebalanceCtx.numDesiredVoters { + log.VEventf(ctx, 3, "couldn't find enough voter rebalance targets for r%d (%d/%d)", + rangeDesc.RangeID, len(targetVoterRepls), rebalanceCtx.numDesiredVoters) continue } - newDiversity := rangeDiversityScore(sr.rq.allocator.storePool.getLocalitiesByStore(targetVoters)) - if newDiversity < curDiversity { - log.VEventf(ctx, 3, - "new diversity %.2f for r%d worse than current diversity %.2f; not rebalancing", - newDiversity, desc.RangeID, curDiversity) + if len(targetNonVoterRepls) < rebalanceCtx.numDesiredNonVoters { + log.VEventf(ctx, 3, "couldn't find enough non-voter rebalance targets for r%d (%d/%d)", + rangeDesc.RangeID, len(targetNonVoterRepls), rebalanceCtx.numDesiredNonVoters) + continue + } + + // If the new set of replicas has lower diversity scores than the existing + // set, we don't continue with the rebalance. since we want to ensure we + // don't hurt locality diversity just to improve QPS. + // + // 1. Ensure that diversity among voting replicas is not hurt by this + // rebalancing decision. + if sr.worsensDiversity( + ctx, + rangeDesc.GetRangeID(), + rangeDesc.Replicas().VoterDescriptors(), + targetVoterRepls, + true, /* onlyVoters */ + ) { + continue + } + // 2. Ensure that diversity among all replicas is not hurt by this decision. + allTargetRepls := append(targetVoterRepls, targetNonVoterRepls...) + if sr.worsensDiversity( + ctx, + rangeDesc.GetRangeID(), + rangeDesc.Replicas().Descriptors(), + allTargetRepls, + false, /* onlyVoters */ + ) { continue } - // Pick the replica with the least QPS to be leaseholder; + // Pick the voter with the least QPS to be leaseholder; // RelocateRange transfers the lease to the first provided target. newLeaseIdx := 0 newLeaseQPS := math.MaxFloat64 var raftStatus *raft.Status - for i := 0; i < len(targets); i++ { + for i := 0; i < len(targetVoterRepls); i++ { // Ensure we don't transfer the lease to an existing replica that is behind // in processing its raft log. - if replica, ok := desc.GetReplicaDescriptor(targets[i].StoreID); ok { + if replica, ok := rangeDesc.GetReplicaDescriptor(targetVoterRepls[i].StoreID); ok { if raftStatus == nil { raftStatus = sr.getRaftStatusFn(replWithStats.repl) } @@ -619,15 +596,266 @@ func (sr *StoreRebalancer) chooseReplicaToRebalance( } } - storeDesc, ok := storeMap[targets[i].StoreID] + storeDesc, ok := storeMap[targetVoterRepls[i].StoreID] if ok && storeDesc.Capacity.QueriesPerSecond < newLeaseQPS { newLeaseIdx = i newLeaseQPS = storeDesc.Capacity.QueriesPerSecond } } - targets[0], targets[newLeaseIdx] = targets[newLeaseIdx], targets[0] - return replWithStats, targets + targetVoterRepls[0], targetVoterRepls[newLeaseIdx] = targetVoterRepls[newLeaseIdx], targetVoterRepls[0] + return replWithStats, + roachpb.MakeReplicaSet(targetVoterRepls).ReplicationTargets(), + roachpb.MakeReplicaSet(targetNonVoterRepls).ReplicationTargets() + } +} + +// worsensDiversity returns true iff the diversity score of `currentRepls` is +// higher than `targetRepls` (either among just the set of voting replicas, or +// across all replicas in the range -- determined by `onlyVoters`). +func (sr *StoreRebalancer) worsensDiversity( + ctx context.Context, + rangeID roachpb.RangeID, + currentRepls, targetRepls []roachpb.ReplicaDescriptor, + onlyVoters bool, +) bool { + curDiversity := rangeDiversityScore( + sr.rq.allocator.storePool.getLocalitiesByStore(currentRepls), + ) + newDiversity := rangeDiversityScore( + sr.rq.allocator.storePool.getLocalitiesByStore(targetRepls), + ) + replicaStr := "replica" + if onlyVoters { + replicaStr = "voting replica" + } + if curDiversity > newDiversity { + log.VEventf( + ctx, + 3, + "new %s diversity %.2f for r%d worse than current diversity %.2f; not rebalancing", + replicaStr, + newDiversity, + rangeID, + curDiversity, + ) + return true + } + return false +} + +// getRebalanceCandidatesBasedOnQPS returns a list of rebalance targets for +// voting and non-voting replicas on the range that match the relevant +// constraints on the range and would further the goal of balancing the QPS on +// the stores in this cluster. In case there aren't enough stores that meet the +// constraints and are valid rebalance candidates based on QPS, the list of +// targets returned may contain fewer-than-required replicas. +// +// NB: `localStoreDesc` is expected to be the leaseholder of the range being +// operated on. +func (sr *StoreRebalancer) getRebalanceCandidatesBasedOnQPS( + ctx context.Context, + rebalanceCtx rangeRebalanceContext, + localStoreDesc *roachpb.StoreDescriptor, + storeMap map[roachpb.StoreID]*roachpb.StoreDescriptor, + storeList StoreList, + minQPS, maxQPS float64, +) (finalVoterTargets, finalNonVoterTargets []roachpb.ReplicaDescriptor) { + options := sr.rq.allocator.scorerOptions() + options.qpsRebalanceThreshold = qpsRebalanceThreshold.Get(&sr.st.SV) + + // Decide which voting / non-voting replicas we want to keep around and find + // rebalance targets for the rest. + partialVoterTargets := sr.pickReplsToKeep( + ctx, + rebalanceCtx, + nil, /* replsToExclude */ + localStoreDesc, + storeMap, + maxQPS, + voterTarget, + ) + finalVoterTargets = sr.pickRemainingRepls( + ctx, + rebalanceCtx, + partialVoterTargets, + nil, /* partialNonVoterTargets */ + storeMap, + storeList, + options, + minQPS, maxQPS, + voterTarget, + ) + + partialNonVoterTargets := sr.pickReplsToKeep( + ctx, + rebalanceCtx, + // NB: `finalVoterTargets` may contain replicas that are part of the + // existing set of non-voter targets, so we make sure that we don't keep + // those replicas around in `partialNonVoterTargets`. + finalVoterTargets, + localStoreDesc, + storeMap, + maxQPS, + nonVoterTarget, + ) + finalNonVoterTargets = sr.pickRemainingRepls( + ctx, + rebalanceCtx, + finalVoterTargets, + partialNonVoterTargets, + storeMap, + storeList, + options, + minQPS, + maxQPS, + nonVoterTarget, + ) + + return finalVoterTargets, finalNonVoterTargets +} + +// pickRemainingRepls determines the set of rebalance targets to fill in the +// rest of `partial{Voter,NonVoter}Targets` such that the resulting set contains +// exactly as many replicas as dictated by the zone configs. +// +// The caller is expected to synthesize the set of +// `partial{Voter,NonVoter}Targets` via `StoreRebalancer.pickReplsToKeep`. +func (sr *StoreRebalancer) pickRemainingRepls( + ctx context.Context, + rebalanceCtx rangeRebalanceContext, + partialVoterTargets, partialNonVoterTargets []roachpb.ReplicaDescriptor, + storeMap map[roachpb.StoreID]*roachpb.StoreDescriptor, + storeList StoreList, + options scorerOptions, + minQPS, maxQPS float64, + targetType targetReplicaType, +) (finalTargetsForType []roachpb.ReplicaDescriptor) { + var numDesiredReplsForType int + switch targetType { + case voterTarget: + finalTargetsForType = partialVoterTargets + numDesiredReplsForType = rebalanceCtx.numDesiredVoters + case nonVoterTarget: + finalTargetsForType = partialNonVoterTargets + numDesiredReplsForType = rebalanceCtx.numDesiredNonVoters + default: + log.Fatalf(ctx, "unknown targetReplicaType %s", targetType) + } + + for len(finalTargetsForType) < numDesiredReplsForType { + // Use the preexisting Allocate{Non}Voter logic to ensure that + // considerations such as zone constraints, locality diversity, and full + // disk come into play. + target, _ := sr.rq.allocator.allocateTargetFromList( + ctx, + storeList, + rebalanceCtx.zone, + partialVoterTargets, + partialNonVoterTargets, + options, + targetType, + ) + if target == nil { + log.VEventf( + ctx, 3, "no rebalance %ss found to replace the current store for r%d", + targetType, rebalanceCtx.rangeDesc.RangeID, + ) + break + } + + meanQPS := storeList.candidateQueriesPerSecond.mean + if sr.shouldNotMoveTo( + ctx, + storeMap, + rebalanceCtx.replWithStats, + target.StoreID, + meanQPS, + minQPS, + maxQPS, + ) { + // NB: If the target store returned by the allocator is not fit to + // receive a new replica due to balancing reasons, there is no point + // continuing with this loop since we'd expect future calls to + // `allocateTargetFromList` to return the same target. + break + } + + finalTargetsForType = append(finalTargetsForType, roachpb.ReplicaDescriptor{ + NodeID: target.Node.NodeID, + StoreID: target.StoreID, + }) + } + return finalTargetsForType +} + +// pickReplsToKeep determines the set of existing replicas for a range which +// should _not_ be rebalanced (because they belong to stores that aren't +// overloaded). +func (sr *StoreRebalancer) pickReplsToKeep( + ctx context.Context, + rebalanceCtx rangeRebalanceContext, + replsToExclude []roachpb.ReplicaDescriptor, + localStoreDesc *roachpb.StoreDescriptor, + storeMap map[roachpb.StoreID]*roachpb.StoreDescriptor, + maxQPS float64, + targetType targetReplicaType, +) (partialTargetRepls []roachpb.ReplicaDescriptor) { + shouldExclude := func(repl roachpb.ReplicaDescriptor) bool { + for _, excluded := range replsToExclude { + if repl.StoreID == excluded.StoreID { + return true + } + } + return false + } + + var currentReplsForType []roachpb.ReplicaDescriptor + switch targetType { + case voterTarget: + currentReplsForType = rebalanceCtx.rangeDesc.Replicas().VoterDescriptors() + case nonVoterTarget: + currentReplsForType = rebalanceCtx.rangeDesc.Replicas().NonVoterDescriptors() + default: + log.Fatalf(ctx, "unknown targetReplicaType: %s", targetType) + } + + // Check the existing replicas, keeping around those that aren't overloaded. + for i := range currentReplsForType { + if shouldExclude(currentReplsForType[i]) || + currentReplsForType[i].StoreID == localStoreDesc.StoreID { + continue + } + + // Keep the replica in the range if we don't know its QPS or if its QPS is + // below the upper threshold. Punishing stores not in our store map could + // cause mass evictions if the storePool gets out of sync. + storeDesc, ok := storeMap[currentReplsForType[i].StoreID] + if !ok || storeDesc.Capacity.QueriesPerSecond < maxQPS { + if log.V(3) { + var reason redact.RedactableString + if ok { + reason = redact.Sprintf( + " (qps %.2f vs max %.2f)", + storeDesc.Capacity.QueriesPerSecond, + maxQPS, + ) + } + log.VEventf( + ctx, + 3, + "keeping %s r%d/%d on s%d%s", + targetType, + rebalanceCtx.rangeDesc.RangeID, + currentReplsForType[i].ReplicaID, + currentReplsForType[i].StoreID, + reason, + ) + } + + partialTargetRepls = append(partialTargetRepls, currentReplsForType[i]) + } } + return partialTargetRepls } func shouldNotMoveAway( @@ -653,39 +881,39 @@ func (sr *StoreRebalancer) shouldNotMoveTo( ctx context.Context, storeMap map[roachpb.StoreID]*roachpb.StoreDescriptor, replWithStats replicaWithStats, - candidateStore roachpb.StoreID, + candidateStoreID roachpb.StoreID, meanQPS float64, minQPS float64, maxQPS float64, ) bool { - storeDesc, ok := storeMap[candidateStore] + candidateStore, ok := storeMap[candidateStoreID] if !ok { - log.VEventf(ctx, 3, "missing store descriptor for s%d", candidateStore) + log.VEventf(ctx, 3, "missing store descriptor for s%d", candidateStoreID) return true } - newCandidateQPS := storeDesc.Capacity.QueriesPerSecond + replWithStats.qps - if storeDesc.Capacity.QueriesPerSecond < minQPS { + newCandidateQPS := candidateStore.Capacity.QueriesPerSecond + replWithStats.qps + if candidateStore.Capacity.QueriesPerSecond < minQPS { if newCandidateQPS > maxQPS { log.VEventf(ctx, 3, "r%d's %.2f qps would push s%d over the max threshold (%.2f) with %.2f qps afterwards", - replWithStats.repl.RangeID, replWithStats.qps, candidateStore, maxQPS, newCandidateQPS) + replWithStats.repl.RangeID, replWithStats.qps, candidateStoreID, maxQPS, newCandidateQPS) return true } } else if newCandidateQPS > meanQPS { log.VEventf(ctx, 3, "r%d's %.2f qps would push s%d over the mean (%.2f) with %.2f qps afterwards", - replWithStats.repl.RangeID, replWithStats.qps, candidateStore, meanQPS, newCandidateQPS) + replWithStats.repl.RangeID, replWithStats.qps, candidateStoreID, meanQPS, newCandidateQPS) return true } // If the target store is on a separate node, we will also care // about node liveness. - targetNodeID := storeDesc.Node.NodeID + targetNodeID := candidateStore.Node.NodeID if targetNodeID != sr.rq.store.Ident.NodeID { if !sr.rq.store.cfg.StorePool.isNodeReadyForRoutineReplicaTransfer(ctx, targetNodeID) { log.VEventf(ctx, 3, - "refusing to transfer replica to n%d/s%d", targetNodeID, storeDesc.StoreID) + "refusing to transfer replica to n%d/s%d", targetNodeID, candidateStore.StoreID) return true } } diff --git a/pkg/kv/kvserver/store_rebalancer_test.go b/pkg/kv/kvserver/store_rebalancer_test.go index ed8da486fdfe..d311ed40c4ec 100644 --- a/pkg/kv/kvserver/store_rebalancer_test.go +++ b/pkg/kv/kvserver/store_rebalancer_test.go @@ -13,7 +13,6 @@ package kvserver import ( "context" "reflect" - "sort" "testing" "github.com/cockroachdb/cockroach/pkg/roachpb" @@ -24,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/stop" "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" "go.etcd.io/etcd/raft/v3" "go.etcd.io/etcd/raft/v3/tracker" ) @@ -73,8 +73,8 @@ var ( type testRange struct { // The first storeID in the list will be the leaseholder. - storeIDs []roachpb.StoreID - qps float64 + voters, nonVoters []roachpb.StoreID + qps float64 } func loadRanges(rr *replicaRankings, s *Store, ranges []testRange) { @@ -83,17 +83,26 @@ func loadRanges(rr *replicaRankings, s *Store, ranges []testRange) { repl := &Replica{store: s} repl.mu.state.Desc = &roachpb.RangeDescriptor{} repl.mu.zone = s.cfg.DefaultZoneConfig - for _, storeID := range r.storeIDs { + for _, storeID := range r.voters { repl.mu.state.Desc.InternalReplicas = append(repl.mu.state.Desc.InternalReplicas, roachpb.ReplicaDescriptor{ NodeID: roachpb.NodeID(storeID), StoreID: storeID, ReplicaID: roachpb.ReplicaID(storeID), + Type: roachpb.ReplicaTypeVoterFull(), }) } repl.mu.state.Lease = &roachpb.Lease{ Expiration: &hlc.MaxTimestamp, Replica: repl.mu.state.Desc.InternalReplicas[0], } + for _, storeID := range r.nonVoters { + repl.mu.state.Desc.InternalReplicas = append(repl.mu.state.Desc.InternalReplicas, roachpb.ReplicaDescriptor{ + NodeID: roachpb.NodeID(storeID), + StoreID: storeID, + ReplicaID: roachpb.ReplicaID(storeID), + Type: roachpb.ReplicaTypeNonVoter(), + }) + } // TODO(a-robinson): The below three lines won't be needed once the old // rangeInfo code is ripped out of the allocator. repl.mu.state.Stats = &enginepb.MVCCStats{} @@ -180,7 +189,7 @@ func TestChooseLeaseToTransfer(t *testing.T) { } for _, tc := range testCases { - loadRanges(rr, s, []testRange{{storeIDs: tc.storeIDs, qps: tc.qps}}) + loadRanges(rr, s, []testRange{{voters: tc.storeIDs, qps: tc.qps}}) hottestRanges := rr.topQPS() _, target, _ := sr.chooseLeaseToTransfer( ctx, &hottestRanges, &localDesc, storeList, storeMap, minQPS, maxQPS) @@ -191,7 +200,7 @@ func TestChooseLeaseToTransfer(t *testing.T) { } } -func TestChooseReplicaToRebalance(t *testing.T) { +func TestChooseRangeToRebalance(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -236,53 +245,279 @@ func TestChooseReplicaToRebalance(t *testing.T) { } testCases := []struct { - storeIDs []roachpb.StoreID - excluded []roachpb.StoreID // stores that are not to be considered for rebalancing - qps float64 - expectTargets []roachpb.StoreID // the first listed store is expected to be the leaseholder + voters, nonVoters []roachpb.StoreID + // stores that are not to be considered for rebalancing + nonLive []roachpb.StoreID + qps float64 + // the first listed voter target is expected to be the leaseholder + expectedRebalancedVoters, expectedRebalancedNonVoters []roachpb.StoreID }{ - {[]roachpb.StoreID{1}, nil, 100, []roachpb.StoreID{5}}, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5}, + expectedRebalancedNonVoters: nil, + }, // If s5 is unavailable, s4 is the next best guess. - {[]roachpb.StoreID{1}, []roachpb.StoreID{5}, 100, []roachpb.StoreID{4}}, - {[]roachpb.StoreID{1}, []roachpb.StoreID{4, 5}, 100, []roachpb.StoreID{}}, - - {[]roachpb.StoreID{1}, nil, 500, []roachpb.StoreID{5}}, - {[]roachpb.StoreID{1}, []roachpb.StoreID{5}, 500, []roachpb.StoreID{}}, - - {[]roachpb.StoreID{1}, nil, 800, nil}, - - {[]roachpb.StoreID{1}, nil, 1.5, []roachpb.StoreID{5}}, - {[]roachpb.StoreID{1}, []roachpb.StoreID{5}, 1.5, []roachpb.StoreID{4}}, - - {[]roachpb.StoreID{1}, nil, 1.49, nil}, - - {[]roachpb.StoreID{1, 2}, nil, 100, []roachpb.StoreID{5, 2}}, - {[]roachpb.StoreID{1, 2}, []roachpb.StoreID{5}, 100, []roachpb.StoreID{4, 2}}, - - {[]roachpb.StoreID{1, 3}, nil, 100, []roachpb.StoreID{5, 3}}, - {[]roachpb.StoreID{1, 4}, nil, 100, []roachpb.StoreID{5, 4}}, - - {[]roachpb.StoreID{1, 2}, nil, 800, nil}, - {[]roachpb.StoreID{1, 2}, nil, 1.49, nil}, - {[]roachpb.StoreID{1, 4, 5}, nil, 500, nil}, - {[]roachpb.StoreID{1, 4, 5}, nil, 100, nil}, - {[]roachpb.StoreID{1, 3, 5}, nil, 500, nil}, - {[]roachpb.StoreID{1, 3, 4}, nil, 500, []roachpb.StoreID{5, 4, 3}}, - - {[]roachpb.StoreID{1, 3, 5}, nil, 100, []roachpb.StoreID{5, 4, 3}}, - {[]roachpb.StoreID{1, 3, 5}, []roachpb.StoreID{4}, 100, nil}, - + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: []roachpb.StoreID{5}, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{4}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: []roachpb.StoreID{4, 5}, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: nil, + qps: 500, + expectedRebalancedVoters: []roachpb.StoreID{5}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: []roachpb.StoreID{5}, + qps: 500, + expectedRebalancedVoters: []roachpb.StoreID{}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: nil, + qps: 800, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: nil, + qps: 1.5, + expectedRebalancedVoters: []roachpb.StoreID{5}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: []roachpb.StoreID{5}, + qps: 1.5, + expectedRebalancedVoters: []roachpb.StoreID{4}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: nil, + nonLive: nil, + qps: 1.49, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 2}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5, 2}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 2}, + nonVoters: nil, + nonLive: []roachpb.StoreID{5}, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{4, 2}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 3}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5, 3}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 4}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5, 4}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 2}, + nonVoters: nil, + nonLive: nil, + qps: 800, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 2}, + nonVoters: nil, + nonLive: nil, + qps: 1.49, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 4, 5}, + nonVoters: nil, + nonLive: nil, + qps: 500, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 4, 5}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 3, 5}, + nonVoters: nil, + nonLive: nil, + qps: 500, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 3, 4}, + nonVoters: nil, + nonLive: nil, + qps: 500, + expectedRebalancedVoters: []roachpb.StoreID{5, 4, 3}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 3, 5}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5, 4, 3}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 3, 5}, + nonVoters: nil, + nonLive: []roachpb.StoreID{4}, + qps: 100, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, // Rebalancing to s2 isn't chosen even though it's better than s1 because it's above the mean. - {[]roachpb.StoreID{1, 3, 4, 5}, nil, 100, nil}, - {[]roachpb.StoreID{1, 2, 4, 5}, nil, 100, nil}, - {[]roachpb.StoreID{1, 2, 3, 5}, nil, 100, []roachpb.StoreID{5, 4, 3, 2}}, - {[]roachpb.StoreID{1, 2, 3, 4}, nil, 100, []roachpb.StoreID{5, 4, 3, 2}}, + { + voters: []roachpb.StoreID{1, 3, 4, 5}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 2, 4, 5}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 2, 3, 5}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5, 4, 3, 2}, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1, 2, 3, 4}, + nonVoters: nil, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5, 4, 3, 2}, + expectedRebalancedNonVoters: nil, + }, + { + // Don't bother moving any replicas around since it won't make much of a + // difference. See `minQPSFraction` inside `chooseRangeToRebalance()`. + voters: []roachpb.StoreID{1}, + nonVoters: []roachpb.StoreID{2, 3, 4}, + nonLive: nil, + qps: 1, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + // None of the stores are worth moving to because they will be above the + // maxQPS after the move. + voters: []roachpb.StoreID{1}, + nonVoters: []roachpb.StoreID{2, 3, 4}, + nonLive: nil, + qps: 1000, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: []roachpb.StoreID{2, 3, 4}, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5}, + expectedRebalancedNonVoters: []roachpb.StoreID{4, 3, 2}, + }, + // Voters may rebalance to stores that have a non-voter, and those + // displaced non-voters will be rebalanced to other valid stores. + { + voters: []roachpb.StoreID{1}, + nonVoters: []roachpb.StoreID{5}, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5}, + expectedRebalancedNonVoters: []roachpb.StoreID{4}, + }, + { + voters: []roachpb.StoreID{1}, + nonVoters: []roachpb.StoreID{5, 2, 3}, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: []roachpb.StoreID{5}, + expectedRebalancedNonVoters: []roachpb.StoreID{2, 3, 4}, + }, + { + // Voters may rebalance to stores that have a non-voter, but only if the + // displaced non-voters can be rebalanced to other underfull (based on + // QPS) stores. Note that stores 1 and 2 are above the maxQPS and the + // meanQPS, respectively, so non-voters cannot be rebalanced to them. + voters: []roachpb.StoreID{1, 2}, + nonVoters: []roachpb.StoreID{5, 4, 3}, + nonLive: nil, + qps: 100, + expectedRebalancedVoters: nil, + expectedRebalancedNonVoters: nil, + }, } for _, tc := range testCases { t.Run("", func(t *testing.T) { a.storePool.isNodeReadyForRoutineReplicaTransfer = func(_ context.Context, n roachpb.NodeID) bool { - for _, s := range tc.excluded { + for _, s := range tc.nonLive { // NodeID match StoreIDs here, so this comparison is valid. if roachpb.NodeID(s) == n { return false @@ -291,35 +526,36 @@ func TestChooseReplicaToRebalance(t *testing.T) { return true } - s.cfg.DefaultZoneConfig.NumReplicas = proto.Int32(int32(len(tc.storeIDs))) - loadRanges(rr, s, []testRange{{storeIDs: tc.storeIDs, qps: tc.qps}}) + s.cfg.DefaultZoneConfig.NumVoters = proto.Int32(int32(len(tc.voters))) + s.cfg.DefaultZoneConfig.NumReplicas = proto.Int32(int32(len(tc.voters) + len(tc.nonVoters))) + loadRanges( + rr, s, []testRange{ + {voters: tc.voters, nonVoters: tc.nonVoters, qps: tc.qps}, + }, + ) hottestRanges := rr.topQPS() - _, targets := sr.chooseReplicaToRebalance( - ctx, &hottestRanges, &localDesc, storeList, storeMap, minQPS, maxQPS) - - if len(targets) != len(tc.expectTargets) { - t.Fatalf("chooseReplicaToRebalance(existing=%v, qps=%f) got %v; want %v", - tc.storeIDs, tc.qps, targets, tc.expectTargets) - } - if len(targets) == 0 { - return + _, voterTargets, nonVoterTargets := sr.chooseRangeToRebalance( + ctx, &hottestRanges, &localDesc, storeList, storeMap, minQPS, maxQPS, + ) + + require.Len(t, voterTargets, len(tc.expectedRebalancedVoters)) + if len(voterTargets) > 0 && voterTargets[0].StoreID != tc.expectedRebalancedVoters[0] { + t.Errorf("chooseRangeToRebalance(existing=%v, qps=%f) chose s%d as leaseholder; want s%v", + tc.voters, tc.qps, voterTargets[0], tc.expectedRebalancedVoters[0]) } - if targets[0].StoreID != tc.expectTargets[0] { - t.Errorf("chooseReplicaToRebalance(existing=%v, qps=%f) chose s%d as leaseholder; want s%v", - tc.storeIDs, tc.qps, targets[0], tc.expectTargets[0]) + voterStoreIDs := make([]roachpb.StoreID, len(voterTargets)) + for i, target := range voterTargets { + voterStoreIDs[i] = target.StoreID } + require.ElementsMatch(t, voterStoreIDs, tc.expectedRebalancedVoters) - targetStores := make([]roachpb.StoreID, len(targets)) - for i, target := range targets { - targetStores[i] = target.StoreID - } - sort.Sort(roachpb.StoreIDSlice(targetStores)) - sort.Sort(roachpb.StoreIDSlice(tc.expectTargets)) - if !reflect.DeepEqual(targetStores, tc.expectTargets) { - t.Errorf("chooseReplicaToRebalance(existing=%v, qps=%f) chose targets %v; want %v", - tc.storeIDs, tc.qps, targetStores, tc.expectTargets) + require.Len(t, nonVoterTargets, len(tc.expectedRebalancedNonVoters)) + nonVoterStoreIDs := make([]roachpb.StoreID, len(nonVoterTargets)) + for i, target := range nonVoterTargets { + nonVoterStoreIDs[i] = target.StoreID } + require.ElementsMatch(t, nonVoterStoreIDs, tc.expectedRebalancedNonVoters) }) } } @@ -354,7 +590,7 @@ func TestNoLeaseTransferToBehindReplicas(t *testing.T) { // Load in a range with replicas on an overfull node, a slightly underfull // node, and a very underfull node. - loadRanges(rr, s, []testRange{{storeIDs: []roachpb.StoreID{1, 4, 5}, qps: 100}}) + loadRanges(rr, s, []testRange{{voters: []roachpb.StoreID{1, 4, 5}, qps: 100}}) hottestRanges := rr.topQPS() repl := hottestRanges[0].repl @@ -390,11 +626,11 @@ func TestNoLeaseTransferToBehindReplicas(t *testing.T) { // Then do the same, but for replica rebalancing. Make s5 an existing replica // that's behind, and see how a new replica is preferred as the leaseholder // over it. - loadRanges(rr, s, []testRange{{storeIDs: []roachpb.StoreID{1, 3, 5}, qps: 100}}) + loadRanges(rr, s, []testRange{{voters: []roachpb.StoreID{1, 3, 5}, qps: 100}}) hottestRanges = rr.topQPS() repl = hottestRanges[0].repl - _, targets := sr.chooseReplicaToRebalance( + _, targets, _ := sr.chooseRangeToRebalance( ctx, &hottestRanges, &localDesc, storeList, storeMap, minQPS, maxQPS) expectTargets := []roachpb.ReplicationTarget{ {NodeID: 4, StoreID: 4}, {NodeID: 5, StoreID: 5}, {NodeID: 3, StoreID: 3},