Skip to content

Commit

Permalink
cluster: add envs to skip topology sanity check in scale-in process (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
AstroProfundis authored Nov 22, 2021
1 parent d4de166 commit a713f26
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 26 deletions.
11 changes: 10 additions & 1 deletion pkg/cluster/operation/destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,16 @@ func DestroyClusterTombstone(
}
}

pdEndpoints := cluster.GetPDList()
var pdEndpoints []string
forcePDEndpoints := os.Getenv(EnvNamePDEndpointOverwrite) // custom set PD endpoint list

if forcePDEndpoints != "" {
pdEndpoints = strings.Split(forcePDEndpoints, ",")
log.Warnf("%s is set, using %s as PD endpoints", EnvNamePDEndpointOverwrite, pdEndpoints)
} else {
pdEndpoints = cluster.GetPDList()
}

var pdClient = api.NewPDClient(pdEndpoints, 10*time.Second, tlsCfg)

tcpProxy := proxy.GetTCPProxy()
Expand Down
6 changes: 6 additions & 0 deletions pkg/cluster/operation/operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ import (
"github.com/pingcap/tiup/pkg/set"
)

// environment variable names that used to interrupt operations
const (
EnvNameSkipScaleInTopoCheck = "SKIP_SCALEIN_TOPO_CHECK"
EnvNamePDEndpointOverwrite = "FORCE_PD_ENDPOINTS"
)

// Options represents the operation options
type Options struct {
Roles []string
Expand Down
70 changes: 46 additions & 24 deletions pkg/cluster/operation/scale_in.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ import (
"crypto/tls"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"time"

"github.com/fatih/color"
Expand All @@ -28,6 +30,7 @@ import (
"github.com/pingcap/tiup/pkg/logger/log"
"github.com/pingcap/tiup/pkg/proxy"
"github.com/pingcap/tiup/pkg/set"
"github.com/pingcap/tiup/pkg/tui"
"github.com/pingcap/tiup/pkg/utils"
)

Expand Down Expand Up @@ -109,38 +112,57 @@ func ScaleInCluster(
deletedDiff[inst.ComponentName()] = append(deletedDiff[inst.ComponentName()], inst)
}

// Cannot delete all PD servers
if len(deletedDiff[spec.ComponentPD]) == len(cluster.PDServers) {
return errors.New("cannot delete all PD servers")
skipTopoCheck := false
if v := os.Getenv(EnvNameSkipScaleInTopoCheck); v != "" { // any value except empty will work as "true"
skipTopoCheck = true
}

// Cannot delete all TiKV servers
if len(deletedDiff[spec.ComponentTiKV]) == len(cluster.TiKVServers) {
return errors.New("cannot delete all TiKV servers")
}
if skipTopoCheck {
log.Warnf("%s is set, topology checks ignored, the cluster might be broken after the operations!", EnvNameSkipScaleInTopoCheck)
if ok, input := tui.PromptForConfirmYes("Are you sure to continue? [y/N]"); !ok {
return errors.Errorf("user aborted with '%s'", input)
}
} else {
// Cannot delete all PD servers
if len(deletedDiff[spec.ComponentPD]) == len(cluster.PDServers) {
return errors.New("cannot delete all PD servers")
}

// Cannot delete TiSpark master server if there's any TiSpark worker remains
if len(deletedDiff[spec.ComponentTiSpark]) > 0 {
var cntDiffTiSparkMaster int
var cntDiffTiSparkWorker int
for _, inst := range deletedDiff[spec.ComponentTiSpark] {
switch inst.Role() {
case spec.RoleTiSparkMaster:
cntDiffTiSparkMaster++
case spec.RoleTiSparkWorker:
cntDiffTiSparkWorker++
}
// Cannot delete all TiKV servers
if len(deletedDiff[spec.ComponentTiKV]) == len(cluster.TiKVServers) {
return errors.New("cannot delete all TiKV servers")
}
if cntDiffTiSparkMaster == len(cluster.TiSparkMasters) &&
cntDiffTiSparkWorker < len(cluster.TiSparkWorkers) {
return errors.New("cannot delete tispark master when there are workers left")

// Cannot delete TiSpark master server if there's any TiSpark worker remains
if len(deletedDiff[spec.ComponentTiSpark]) > 0 {
var cntDiffTiSparkMaster int
var cntDiffTiSparkWorker int
for _, inst := range deletedDiff[spec.ComponentTiSpark] {
switch inst.Role() {
case spec.RoleTiSparkMaster:
cntDiffTiSparkMaster++
case spec.RoleTiSparkWorker:
cntDiffTiSparkWorker++
}
}
if cntDiffTiSparkMaster == len(cluster.TiSparkMasters) &&
cntDiffTiSparkWorker < len(cluster.TiSparkWorkers) {
return errors.New("cannot delete tispark master when there are workers left")
}
}
}

var pdEndpoints []string
for _, instance := range (&spec.PDComponent{Topology: cluster}).Instances() {
if !deletedNodes.Exist(instance.ID()) {
pdEndpoints = append(pdEndpoints, Addr(instance))
forcePDEndpoints := os.Getenv(EnvNamePDEndpointOverwrite) // custom set PD endpoint list

if forcePDEndpoints != "" {
pdEndpoints = strings.Split(forcePDEndpoints, ",")
log.Warnf("%s is set, using %s as PD endpoints", EnvNamePDEndpointOverwrite, pdEndpoints)
} else {
for _, instance := range (&spec.PDComponent{Topology: cluster}).Instances() {
if !deletedNodes.Exist(instance.ID()) {
pdEndpoints = append(pdEndpoints, Addr(instance))
}
}
}

Expand Down
14 changes: 13 additions & 1 deletion pkg/cluster/operation/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ import (
"context"
"crypto/tls"
"fmt"
"os"
"reflect"
"strconv"
"strings"
"time"

perrs "github.com/pingcap/errors"
Expand Down Expand Up @@ -60,9 +62,19 @@ func Upgrade(
var origLeaderScheduleLimit int
var origRegionScheduleLimit int
var err error

var pdEndpoints []string
forcePDEndpoints := os.Getenv(EnvNamePDEndpointOverwrite) // custom set PD endpoint list

switch component.Name() {
case spec.ComponentTiKV:
pdClient := api.NewPDClient(topo.(*spec.Specification).GetPDList(), 10*time.Second, tlsCfg)
if forcePDEndpoints != "" {
pdEndpoints = strings.Split(forcePDEndpoints, ",")
log.Warnf("%s is set, using %s as PD endpoints", EnvNamePDEndpointOverwrite, pdEndpoints)
} else {
pdEndpoints = topo.(*spec.Specification).GetPDList()
}
pdClient := api.NewPDClient(pdEndpoints, 10*time.Second, tlsCfg)
origLeaderScheduleLimit, origRegionScheduleLimit, err = increaseScheduleLimit(ctx, pdClient)
if err != nil {
// the config modifing error should be able to be safely ignored, as it will
Expand Down

0 comments on commit a713f26

Please sign in to comment.