From d513c864f5d8841130cefe8e235b68d98b5f8738 Mon Sep 17 00:00:00 2001 From: 9547 Date: Sun, 13 Dec 2020 17:02:01 +0800 Subject: [PATCH 1/7] typo(cluster): nodesID -> nodesIDs --- pkg/cluster/manager/destroy.go | 5 ++++- pkg/cluster/task/builder.go | 4 ++-- pkg/cluster/task/update_meta.go | 6 +++--- pkg/cluster/task/update_topology.go | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pkg/cluster/manager/destroy.go b/pkg/cluster/manager/destroy.go index 105c831512..3978db62b3 100644 --- a/pkg/cluster/manager/destroy.go +++ b/pkg/cluster/manager/destroy.go @@ -133,7 +133,10 @@ func (m *Manager) DestroyTombstone( UpdateTopology(name, m.specManager.Path(name), clusterMeta, nodes) regenConfigTasks, _ := buildRegenConfigTasks(m, name, topo, base, nodes) - t := b.ParallelStep("+ Refresh instance configs", true, regenConfigTasks...).Parallel(true, buildDynReloadPromTasks(metadata.GetTopology())...).Build() + t := b. + ParallelStep("+ Refresh instance configs", true, regenConfigTasks...). + Parallel(true, buildDynReloadPromTasks(metadata.GetTopology())...). + Build() if err := t.Execute(task.NewContext()); err != nil { if errorx.Cast(err) != nil { // FIXME: Map possible task errors and give suggestions. diff --git a/pkg/cluster/task/builder.go b/pkg/cluster/task/builder.go index c605e39c29..a42dcc4d16 100644 --- a/pkg/cluster/task/builder.go +++ b/pkg/cluster/task/builder.go @@ -112,7 +112,7 @@ func (b *Builder) UpdateMeta(cluster string, metadata *spec.ClusterMeta, deleted b.tasks = append(b.tasks, &UpdateMeta{ cluster: cluster, metadata: metadata, - deletedNodesID: deletedNodeIds, + deletedNodeIDs: deletedNodeIds, }) return b } @@ -123,7 +123,7 @@ func (b *Builder) UpdateTopology(cluster, profile string, metadata *spec.Cluster metadata: metadata, cluster: cluster, profileDir: profile, - deletedNodesID: deletedNodeIds, + deletedNodeIDs: deletedNodeIds, }) return b } diff --git a/pkg/cluster/task/update_meta.go b/pkg/cluster/task/update_meta.go index 8fe272d1c7..813255892d 100644 --- a/pkg/cluster/task/update_meta.go +++ b/pkg/cluster/task/update_meta.go @@ -25,7 +25,7 @@ import ( type UpdateMeta struct { cluster string metadata *spec.ClusterMeta - deletedNodesID []string + deletedNodeIDs []string } // Execute implements the Task interface @@ -33,7 +33,7 @@ type UpdateMeta struct { // the other callers point to this field by a pointer, // so we should update the original topology directly, and don't make a copy func (u *UpdateMeta) Execute(ctx *Context) error { - deleted := set.NewStringSet(u.deletedNodesID...) + deleted := set.NewStringSet(u.deletedNodeIDs...) topo := u.metadata.Topology tidbServers := make([]spec.TiDBSpec, 0) @@ -154,5 +154,5 @@ func (u *UpdateMeta) Rollback(ctx *Context) error { // String implements the fmt.Stringer interface func (u *UpdateMeta) String() string { - return fmt.Sprintf("UpdateMeta: cluster=%s, deleted=`'%s'`", u.cluster, strings.Join(u.deletedNodesID, "','")) + return fmt.Sprintf("UpdateMeta: cluster=%s, deleted=`'%s'`", u.cluster, strings.Join(u.deletedNodeIDs, "','")) } diff --git a/pkg/cluster/task/update_topology.go b/pkg/cluster/task/update_topology.go index 6def0bcdd7..562db6184d 100644 --- a/pkg/cluster/task/update_topology.go +++ b/pkg/cluster/task/update_topology.go @@ -17,7 +17,7 @@ type UpdateTopology struct { cluster string profileDir string metadata *spec.ClusterMeta - deletedNodesID []string + deletedNodeIDs []string } // String implements the fmt.Stringer interface @@ -41,7 +41,7 @@ func (u *UpdateTopology) Execute(ctx *Context) error { topo := u.metadata.Topology - deleted := set.NewStringSet(u.deletedNodesID...) + deleted := set.NewStringSet(u.deletedNodeIDs...) var ops []clientv3.Op var instances []spec.Instance From 9a017fadb70442dc610f9f7b5168da8b0bd4038a Mon Sep 17 00:00:00 2001 From: 9547 Date: Sun, 13 Dec 2020 18:16:38 +0800 Subject: [PATCH 2/7] typo(*): role -> roles --- components/cluster/command/patch.go | 2 +- components/dm/command/patch.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/components/cluster/command/patch.go b/components/cluster/command/patch.go index 249006eef9..bfca649980 100644 --- a/components/cluster/command/patch.go +++ b/components/cluster/command/patch.go @@ -46,7 +46,7 @@ func newPatchCmd() *cobra.Command { cmd.Flags().BoolVar(&overwrite, "overwrite", false, "Use this package in the future scale-out operations") cmd.Flags().StringSliceVarP(&gOpt.Nodes, "node", "N", nil, "Specify the nodes") - cmd.Flags().StringSliceVarP(&gOpt.Roles, "role", "R", nil, "Specify the role") + cmd.Flags().StringSliceVarP(&gOpt.Roles, "role", "R", nil, "Specify the roles") cmd.Flags().Uint64Var(&gOpt.APITimeout, "transfer-timeout", 300, "Timeout in seconds when transferring PD and TiKV store leaders") return cmd } diff --git a/components/dm/command/patch.go b/components/dm/command/patch.go index a4ec859b2c..f212653859 100644 --- a/components/dm/command/patch.go +++ b/components/dm/command/patch.go @@ -46,6 +46,6 @@ func newPatchCmd() *cobra.Command { cmd.Flags().BoolVar(&overwrite, "overwrite", false, "Use this package in the future scale-out operations") cmd.Flags().StringSliceVarP(&gOpt.Nodes, "node", "N", nil, "Specify the nodes") - cmd.Flags().StringSliceVarP(&gOpt.Roles, "role", "R", nil, "Specify the role") + cmd.Flags().StringSliceVarP(&gOpt.Roles, "role", "R", nil, "Specify the roles") return cmd } From 8c71af90efa391bcce79a66538389b0d84fcfbc6 Mon Sep 17 00:00:00 2001 From: 9547 Date: Tue, 15 Dec 2020 00:14:14 +0800 Subject: [PATCH 3/7] fix(cluster/manager): exclude the scale-in node if scaled node were prom --- pkg/cluster/manager/builder.go | 19 +++++++++++++------ pkg/cluster/manager/destroy.go | 2 +- pkg/cluster/manager/scale_in.go | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/pkg/cluster/manager/builder.go b/pkg/cluster/manager/builder.go index cd97912caf..476bcc8ef6 100644 --- a/pkg/cluster/manager/builder.go +++ b/pkg/cluster/manager/builder.go @@ -30,8 +30,8 @@ import ( "github.com/pingcap/tiup/pkg/set" ) -// Dynamic reload Prometheus configuration -func buildDynReloadPromTasks(topo spec.Topology) []task.Task { +// buildReloadPromTasks reloads Prometheus configuration +func buildReloadPromTasks(topo spec.Topology, nodes ...string) []task.Task { monitor := spec.FindComponent(topo, spec.ComponentPrometheus) if monitor == nil { return nil @@ -40,11 +40,18 @@ func buildDynReloadPromTasks(topo spec.Topology) []task.Task { if len(instances) == 0 { return nil } - var dynReloadTasks []task.Task + var tasks []task.Task + deletedNodes := set.NewStringSet(nodes...) for _, inst := range monitor.Instances() { - dynReloadTasks = append(dynReloadTasks, task.NewBuilder().SystemCtl(inst.GetHost(), inst.ServiceName(), "reload", true).Build()) + if deletedNodes.Exist(inst.ID()) { + continue + } + t := task.NewBuilder(). + SystemCtl(inst.GetHost(), inst.ServiceName(), "reload", true). + Build() + tasks = append(tasks, t) } - return dynReloadTasks + return tasks } func buildScaleOutTask( @@ -298,7 +305,7 @@ func buildScaleOutTask( return operator.Start(ctx, newPart, operator.Options{OptTimeout: gOpt.OptTimeout}, tlsCfg) }). Parallel(false, refreshConfigTasks...). - Parallel(false, buildDynReloadPromTasks(metadata.GetTopology())...) + Parallel(false, buildReloadPromTasks(metadata.GetTopology())...) if final != nil { final(builder, name, metadata) diff --git a/pkg/cluster/manager/destroy.go b/pkg/cluster/manager/destroy.go index 3978db62b3..179cff5cb5 100644 --- a/pkg/cluster/manager/destroy.go +++ b/pkg/cluster/manager/destroy.go @@ -135,7 +135,7 @@ func (m *Manager) DestroyTombstone( regenConfigTasks, _ := buildRegenConfigTasks(m, name, topo, base, nodes) t := b. ParallelStep("+ Refresh instance configs", true, regenConfigTasks...). - Parallel(true, buildDynReloadPromTasks(metadata.GetTopology())...). + Parallel(true, buildReloadPromTasks(metadata.GetTopology())...). Build() if err := t.Execute(task.NewContext()); err != nil { if errorx.Cast(err) != nil { diff --git a/pkg/cluster/manager/scale_in.go b/pkg/cluster/manager/scale_in.go index 7aa686fdd6..a255542916 100644 --- a/pkg/cluster/manager/scale_in.go +++ b/pkg/cluster/manager/scale_in.go @@ -92,7 +92,7 @@ func (m *Manager) ScaleIn( t := b. ParallelStep("+ Refresh instance configs", force, regenConfigTasks...). - Parallel(force, buildDynReloadPromTasks(metadata.GetTopology())...). + Parallel(force, buildReloadPromTasks(metadata.GetTopology(), nodes...)...). Build() if err := t.Execute(task.NewContext()); err != nil { From ac87e5169c76b89723c29be9c14e2ee1f88c2d7b Mon Sep 17 00:00:00 2001 From: 9547 Date: Tue, 15 Dec 2020 00:41:36 +0800 Subject: [PATCH 4/7] tests(cluster): add prometheus scale-in/out case --- tests/tiup-cluster/script/scale_tools.sh | 10 ++++++++++ tests/tiup-cluster/topo/full_scale_in_prometheus.yaml | 3 +++ 2 files changed, 13 insertions(+) create mode 100644 tests/tiup-cluster/topo/full_scale_in_prometheus.yaml diff --git a/tests/tiup-cluster/script/scale_tools.sh b/tests/tiup-cluster/script/scale_tools.sh index 32e7b05d32..17068755fa 100755 --- a/tests/tiup-cluster/script/scale_tools.sh +++ b/tests/tiup-cluster/script/scale_tools.sh @@ -35,8 +35,10 @@ function scale_tools() { tiup-cluster $client display $name if [ $test_tls = true ]; then + total=19 total_sub_one=18 else + total=22 total_sub_one=21 fi @@ -70,6 +72,14 @@ function scale_tools() { topo=./topo/full_scale_in_grafana.yaml tiup-cluster $client --yes scale-out $name $topo + echo "start scale out prometheus" + topo=./topo/full_scale_in_prometheus.yaml + tiup-cluster $client --yes scale-out $name $topo + wait_instance_num_reach $name $total $native_ssh + echo "start scale in prometheus" + tiup-cluster $client --yes scale-in $name -N n2:9090 + wait_instance_num_reach $name $total $native_ssh + # make sure grafana dashboards has been set to default (since the full_sale_in_grafana.yaml didn't provide a local dashboards dir) ! tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/grafana-3000/dashboards/tidb.json" diff --git a/tests/tiup-cluster/topo/full_scale_in_prometheus.yaml b/tests/tiup-cluster/topo/full_scale_in_prometheus.yaml new file mode 100644 index 0000000000..533238961e --- /dev/null +++ b/tests/tiup-cluster/topo/full_scale_in_prometheus.yaml @@ -0,0 +1,3 @@ +monitoring_servers: + - host: n2 + rule_dir: /tmp/local/prometheus From e1975568bdea40361dd24ec241f4998c6cf13dc3 Mon Sep 17 00:00:00 2001 From: 9547 Date: Tue, 15 Dec 2020 00:43:11 +0800 Subject: [PATCH 5/7] chore(git): gitignore not used anymore --- tests/tiup-cluster/topo/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tests/tiup-cluster/topo/.gitignore diff --git a/tests/tiup-cluster/topo/.gitignore b/tests/tiup-cluster/topo/.gitignore deleted file mode 100644 index 1e82fc7deb..0000000000 --- a/tests/tiup-cluster/topo/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.yaml From 5b19e79401ff164b574468b3414b01b3e9604242 Mon Sep 17 00:00:00 2001 From: 9547 Date: Tue, 15 Dec 2020 09:38:01 +0800 Subject: [PATCH 6/7] fix(tests/tools): wrong count --- tests/tiup-cluster/script/scale_tools.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/tiup-cluster/script/scale_tools.sh b/tests/tiup-cluster/script/scale_tools.sh index 17068755fa..bdd9555ce5 100755 --- a/tests/tiup-cluster/script/scale_tools.sh +++ b/tests/tiup-cluster/script/scale_tools.sh @@ -35,11 +35,13 @@ function scale_tools() { tiup-cluster $client display $name if [ $test_tls = true ]; then - total=19 total_sub_one=18 + total=19 + total_add_one=20 else - total=22 total_sub_one=21 + total=22 + total_add_one=23 fi echo "start scale in pump" @@ -74,8 +76,8 @@ function scale_tools() { echo "start scale out prometheus" topo=./topo/full_scale_in_prometheus.yaml + wait_instance_num_reach $name $total_add_one $native_ssh tiup-cluster $client --yes scale-out $name $topo - wait_instance_num_reach $name $total $native_ssh echo "start scale in prometheus" tiup-cluster $client --yes scale-in $name -N n2:9090 wait_instance_num_reach $name $total $native_ssh From 27a58719f0468e62055b123cf3de55cf05061f57 Mon Sep 17 00:00:00 2001 From: 9547 Date: Wed, 16 Dec 2020 14:30:38 +0800 Subject: [PATCH 7/7] fix(tests/tiup-cluster): wait nodes after scale --- tests/tiup-cluster/script/scale_tools.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tiup-cluster/script/scale_tools.sh b/tests/tiup-cluster/script/scale_tools.sh index bdd9555ce5..5d2349ddc1 100755 --- a/tests/tiup-cluster/script/scale_tools.sh +++ b/tests/tiup-cluster/script/scale_tools.sh @@ -76,8 +76,8 @@ function scale_tools() { echo "start scale out prometheus" topo=./topo/full_scale_in_prometheus.yaml - wait_instance_num_reach $name $total_add_one $native_ssh tiup-cluster $client --yes scale-out $name $topo + wait_instance_num_reach $name $total_add_one $native_ssh echo "start scale in prometheus" tiup-cluster $client --yes scale-in $name -N n2:9090 wait_instance_num_reach $name $total $native_ssh