Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/remove public key after destroyed #910

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion components/dm/command/scale_in.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,13 @@ func ScaleInDMCluster(
) error {
// instances by uuid
instances := map[string]dm.Instance{}
instCount := map[string]int{}

// make sure all nodeIds exists in topology
for _, component := range topo.ComponentsByStartOrder() {
for _, instance := range component.Instances() {
instances[instance.ID()] = instance
instCount[instance.GetHost()]++
}
}

Expand All @@ -110,7 +112,8 @@ func ScaleInDMCluster(
if !deletedNodes.Exist(instance.ID()) {
continue
}
if err := operator.StopAndDestroyInstance(getter, topo, instance, options, false); err != nil {
instCount[instance.GetHost()]--
if err := operator.StopAndDestroyInstance(getter, topo, instance, options, instCount[instance.GetHost()] == 0); err != nil {
log.Warnf("failed to stop/destroy %s: %v", component.Name(), err)
}
}
Expand Down Expand Up @@ -162,6 +165,14 @@ func ScaleInDMCluster(
if err := operator.DestroyComponent(getter, []dm.Instance{instance}, topo, options); err != nil {
return errors.Annotatef(err, "failed to destroy %s", component.Name())
}

instCount[instance.GetHost()]--
if instCount[instance.GetHost()] == 0 {
if err := operator.DeletePublicKey(getter, instance.GetHost()); err != nil {
return errors.Annotatef(err, "failed to delete public key")
}
}

}
}

Expand Down
30 changes: 30 additions & 0 deletions pkg/cluster/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ var (
// It's used to predict if the connection can establish success in the future.
// Its main purpose is to avoid sshpass hang when user speficied a wrong prompt.
connectionTestCommand = "echo connection test, if killed, check the password prompt"

// SSH authorized_keys file
defaultSSHAuthorizedKeys = "~/.ssh/authorized_keys"
)

// Executor is the executor interface for TiOps, all tasks will in the end
Expand Down Expand Up @@ -155,3 +158,30 @@ func checkLocalIP(ip string) error {

return fmt.Errorf("address %s not found in all interfaces, found ips: %s", ip, strings.Join(foundIps, ","))
}

// FindSSHAuthorizedKeysFile finds the correct path of SSH authorized keys file
func FindSSHAuthorizedKeysFile(exec Executor) string {
// detect if custom path of authorized keys file is set
// NOTE: we do not yet support:
// - custom config for user (~/.ssh/config)
// - sshd started with custom config (other than /etc/ssh/sshd_config)
// - ssh server implementations other than OpenSSH (such as dropbear)
sshAuthorizedKeys := defaultSSHAuthorizedKeys
cmd := "grep -Ev '^\\s*#|^\\s*$' /etc/ssh/sshd_config"
stdout, _, _ := exec.Execute(cmd, true) // error ignored as we have default value
for _, line := range strings.Split(string(stdout), "\n") {
if !strings.Contains(line, "AuthorizedKeysFile") {
continue
}
fields := strings.Fields(line)
if len(fields) >= 2 {
sshAuthorizedKeys = fields[1]
break
}
}

if !strings.HasPrefix(sshAuthorizedKeys, "/") && !strings.HasPrefix(sshAuthorizedKeys, "~") {
sshAuthorizedKeys = fmt.Sprintf("~/%s", sshAuthorizedKeys)
}
return sshAuthorizedKeys
}
2 changes: 1 addition & 1 deletion pkg/cluster/operation/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func Stop(

instCount := map[string]int{}
cluster.IterInstance(func(inst spec.Instance) {
instCount[inst.GetHost()] = instCount[inst.GetHost()] + 1
instCount[inst.GetHost()]++
})

for _, comp := range components {
Expand Down
92 changes: 75 additions & 17 deletions pkg/cluster/operation/destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,20 @@
package operator

import (
"bytes"
"crypto/tls"
"fmt"
"io/ioutil"
"path"
"path/filepath"
"strconv"
"strings"
"time"

"github.com/pingcap/errors"
perrs "github.com/pingcap/errors"
"github.com/pingcap/tiup/pkg/cluster/api"
"github.com/pingcap/tiup/pkg/cluster/executor"
"github.com/pingcap/tiup/pkg/cluster/module"
"github.com/pingcap/tiup/pkg/cluster/spec"
"github.com/pingcap/tiup/pkg/logger/log"
Expand Down Expand Up @@ -54,12 +58,11 @@ func Destroy(
cluster spec.Topology,
options Options,
) error {
uniqueHosts := set.NewStringSet()
coms := cluster.ComponentsByStopOrder()

instCount := map[string]int{}
cluster.IterInstance(func(inst spec.Instance) {
instCount[inst.GetHost()] = instCount[inst.GetHost()] + 1
instCount[inst.GetHost()]++
})

for _, com := range coms {
Expand All @@ -80,9 +83,18 @@ func Destroy(
}
}

gOpts := cluster.BaseTopo().GlobalOptions

// Delete all global deploy directory
for host := range uniqueHosts {
if err := DeleteGlobalDirs(getter, host, cluster.BaseTopo().GlobalOptions); err != nil {
for host := range instCount {
if err := DeleteGlobalDirs(getter, host, gOpts); err != nil {
return nil
}
}

// after all things done, try to remove SSH public key
for host := range instCount {
if err := DeletePublicKey(getter, host); err != nil {
return nil
}
}
Expand All @@ -93,7 +105,7 @@ func Destroy(
// StopAndDestroyInstance stop and destroy the instance,
// if this instance is the host's last one, and the host has monitor deployed,
// we need to destroy the monitor, either
func StopAndDestroyInstance(getter ExecutorGetter, cluster spec.Topology, instance spec.Instance, options Options, destroyMonitor bool) error {
func StopAndDestroyInstance(getter ExecutorGetter, cluster spec.Topology, instance spec.Instance, options Options, destroyNode bool) error {
ignoreErr := options.Force
compName := instance.ComponentName()

Expand All @@ -111,22 +123,32 @@ func StopAndDestroyInstance(getter ExecutorGetter, cluster spec.Topology, instan
log.Warnf("failed to destroy %s: %v", compName, err)
}

// monitoredOptions for dm cluster is nil
monitoredOptions := cluster.GetMonitoredOptions()
if destroyNode {
// monitoredOptions for dm cluster is nil
monitoredOptions := cluster.GetMonitoredOptions()

if destroyMonitor && monitoredOptions != nil {
if err := StopMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil {
if !ignoreErr {
return errors.Annotatef(err, "failed to stop monitor")
if monitoredOptions != nil {
if err := StopMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil {
if !ignoreErr {
return errors.Annotatef(err, "failed to stop monitor")
}
log.Warnf("failed to stop %s: %v", "monitor", err)
}
if err := DestroyMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil {
if !ignoreErr {
return errors.Annotatef(err, "failed to destroy monitor")
}
log.Warnf("failed to destroy %s: %v", "monitor", err)
}
log.Warnf("failed to stop %s: %v", "monitor", err)
}
if err := DestroyMonitored(getter, instance, monitoredOptions, options.OptTimeout); err != nil {

if err := DeletePublicKey(getter, instance.GetHost()); err != nil {
if !ignoreErr {
return errors.Annotatef(err, "failed to destroy monitor")
return errors.Annotatef(err, "failed to delete public key")
}
log.Warnf("failed to destroy %s: %v", "monitor", err)
log.Warnf("failed to delete public key")
}

}
return nil
}
Expand Down Expand Up @@ -171,12 +193,48 @@ func DeleteGlobalDirs(getter ExecutorGetter, host string, options *spec.GlobalOp
return nil
}

// DeletePublicKey deletes the SSH public key from host
func DeletePublicKey(getter ExecutorGetter, host string) error {
e := getter.Get(host)
log.Infof("Delete public key %s", host)
_, pubKeyPath := getter.GetSSHKeySet()
publicKey, err := ioutil.ReadFile(pubKeyPath)
if err != nil {
return perrs.Trace(err)
}

pubKey := string(bytes.TrimSpace(publicKey))
pubKey = strings.ReplaceAll(pubKey, "/", "\\/")
pubKeysFile := executor.FindSSHAuthorizedKeysFile(e)

// delete the public key with Linux `sed` toolkit
c := module.ShellModuleConfig{
Command: fmt.Sprintf("sed -i '/%s/d' %s", pubKey, pubKeysFile),
9547 marked this conversation as resolved.
Show resolved Hide resolved
UseShell: false,
}
shell := module.NewShellModule(c)
stdout, stderr, err := shell.Execute(e)

if len(stdout) > 0 {
fmt.Println(string(stdout))
}
if len(stderr) > 0 {
log.Errorf(string(stderr))
}

if err != nil {
return errors.Annotatef(err, "failed to delete pulblic key on: %s", host)
}

log.Infof("Delete public key %s success", host)
return nil
}

// DestroyMonitored destroy the monitored service.
func DestroyMonitored(getter ExecutorGetter, inst spec.Instance, options *spec.MonitoredOptions, timeout uint64) error {
e := getter.Get(inst.GetHost())
log.Infof("Destroying monitored %s", inst.GetHost())

log.Infof("Destroying monitored")
log.Infof("\tDestroying instance %s", inst.GetHost())

// Stop by systemd.
Expand Down Expand Up @@ -433,7 +491,7 @@ func DestroyClusterTombstone(
instCount := map[string]int{}
for _, component := range cluster.ComponentsByStartOrder() {
for _, instance := range component.Instances() {
instCount[instance.GetHost()] = instCount[instance.GetHost()] + 1
instCount[instance.GetHost()]++
}
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/cluster/operation/operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,6 @@ func FilterInstance(instances []spec.Instance, nodes set.StringSet) (res []spec.
// ExecutorGetter get the executor by host.
type ExecutorGetter interface {
Get(host string) (e executor.Executor)
// GetSSHKeySet gets the SSH private and public key path
GetSSHKeySet() (privateKeyPath, publicKeyPath string)
}
2 changes: 1 addition & 1 deletion pkg/cluster/operation/scale_in.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func ScaleInCluster(
for _, component := range cluster.ComponentsByStartOrder() {
for _, instance := range component.Instances() {
instances[instance.ID()] = instance
instCount[instance.GetHost()] = instCount[instance.GetHost()] + 1
instCount[instance.GetHost()]++
}
}

Expand Down
28 changes: 3 additions & 25 deletions pkg/cluster/task/env_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"strings"

"github.com/joomcode/errorx"
"github.com/pingcap/tiup/pkg/cluster/executor"
"github.com/pingcap/tiup/pkg/cluster/module"
)

Expand All @@ -27,8 +28,6 @@ var (
errEnvInitSubCommandFailed = errNSEnvInit.NewType("sub_command_failed")
// ErrEnvInitFailed is ErrEnvInitFailed
ErrEnvInitFailed = errNSEnvInit.NewType("failed")
// SSH authorized_keys file
defaultSSHAuthorizedKeys = "~/.ssh/authorized_keys"
)

// EnvInit is used to initialize the remote environment, e.g:
Expand Down Expand Up @@ -76,36 +75,15 @@ func (e *EnvInit) execute(ctx *Context) error {
}

// Authorize
cmd := `su - ` + e.deployUser + ` -c 'test -d ~/.ssh || mkdir -p ~/.ssh && chmod 700 ~/.ssh'`
cmd := `su - ` + e.deployUser + ` -c 'mkdir -p ~/.ssh && chmod 700 ~/.ssh'`
_, _, err = exec.Execute(cmd, true)
if err != nil {
return wrapError(errEnvInitSubCommandFailed.
Wrap(err, "Failed to create '~/.ssh' directory for user '%s'", e.deployUser))
}

// detect if custom path of authorized keys file is set
// NOTE: we do not yet support:
// - custom config for user (~/.ssh/config)
// - sshd started with custom config (other than /etc/ssh/sshd_config)
// - ssh server implementations other than OpenSSH (such as dropbear)
sshAuthorizedKeys := defaultSSHAuthorizedKeys
cmd = "grep -Ev '^\\s*#|^\\s*$' /etc/ssh/sshd_config"
stdout, _, _ := exec.Execute(cmd, true) // error ignored as we have default value
for _, line := range strings.Split(string(stdout), "\n") {
if !strings.Contains(line, "AuthorizedKeysFile") {
continue
}
fields := strings.Fields(line)
if len(fields) >= 2 {
sshAuthorizedKeys = fields[1]
}
}

if !strings.HasPrefix(sshAuthorizedKeys, "/") && !strings.HasPrefix(sshAuthorizedKeys, "~") {
sshAuthorizedKeys = fmt.Sprintf("~/%s", sshAuthorizedKeys)
}

pk := strings.TrimSpace(string(pubKey))
sshAuthorizedKeys := executor.FindSSHAuthorizedKeysFile(exec)
cmd = fmt.Sprintf(`su - %[1]s -c 'grep $(echo %[2]s) %[3]s || echo %[2]s >> %[3]s && chmod 600 %[3]s'`,
e.deployUser, pk, sshAuthorizedKeys)
_, _, err = exec.Execute(cmd, true)
Expand Down
9 changes: 7 additions & 2 deletions pkg/cluster/task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ type (
checkResults map[string][]*operator.CheckResult
}

// The public/private key is used to access remote server via the user `tidb`
// The private/public key is used to access remote server via the user `tidb`
PrivateKeyPath string
PublicKeyPath string
}
Expand Down Expand Up @@ -95,7 +95,7 @@ func NewContext() *Context {
}
}

// Get implements operation ExecutorGetter interface.
// Get implements the operation.ExecutorGetter interface.
func (ctx *Context) Get(host string) (e executor.Executor) {
ctx.exec.Lock()
e, ok := ctx.exec.executors[host]
Expand All @@ -107,6 +107,11 @@ func (ctx *Context) Get(host string) (e executor.Executor) {
return
}

// GetSSHKeySet implements the operation.ExecutorGetter interface.
func (ctx *Context) GetSSHKeySet() (privateKeyPath, publicKeyPath string) {
return ctx.PrivateKeyPath, ctx.PublicKeyPath
}

// GetExecutor get the executor.
func (ctx *Context) GetExecutor(host string) (e executor.Executor, ok bool) {
// Mock point for unit test
Expand Down
5 changes: 5 additions & 0 deletions tests/tiup-cluster/script/cmd_subtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,10 @@ function cmd_subtest() {

! tiup-cluster $client _test $name data

cp ~/.tiup/storage/cluster/clusters/$name/ssh/id_rsa "/tmp/$name.id_rsa"
tiup-cluster $client --yes destroy $name

# after destroy the cluster, the public key should be deleted
! ssh -o "StrictHostKeyChecking=no" -o "PasswordAuthentication=no" -i "/tmp/$name.id_rsa" tidb@$ipprefix.101 "ls"
unlink "/tmp/$name.id_rsa"
}
2 changes: 2 additions & 0 deletions tests/tiup-cluster/script/scale_core.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ function scale_core() {
! tiup-cluster $client exec $name -N $ipprefix.102 --command "ls /home/tidb/deploy/monitor-9100/deploy/monitor-9100"
! tiup-cluster $client exec $name -N $ipprefix.102 --command "ps aux | grep node_exporter | grep -qv grep"
! tiup-cluster $client exec $name -N $ipprefix.102 --command "ps aux | grep blackbox_exporter | grep -qv grep"
# after all components on the node were scale-ined, the SSH public is automatically deleted
! ssh -o "StrictHostKeyChecking=no "-o "PasswordAuthentication=no" -i ~/.tiup/storage/cluster/$name/ssh/id_rsa tidb@$ipprefix.102 "ls"

echo "start scale out tidb"
topo=./topo/full_scale_in_tidb.yaml
Expand Down
9 changes: 7 additions & 2 deletions tests/tiup-dm/test_cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,13 @@ tiup-dm exec $name -N $ipprefix.101 --command "ls /home/tidb/deploy/grafana-3000
# test create a task and can replicate data
./script/task/run.sh

tiup-dm --yes destroy $name

# test dm log dir
tiup-dm notfound-command 2>&1 | grep $HOME/.tiup/logs/tiup-dm-debug
TIUP_LOG_PATH=/tmp/a/b tiup-dm notfound-command 2>&1 | grep /tmp/a/b/tiup-dm-debug

cp ~/.tiup/storage/dm/clusters/$name/ssh/id_rsa "/tmp/$name.id_rsa"
tiup-dm --yes destroy $name

# after destroy the cluster, the public key should be deleted
! ssh -o "StrictHostKeyChecking=no" -o "PasswordAuthentication=no" -i "/tmp/$name.id_rsa" tidb@$ipprefix.102 "ls"
unlink "/tmp/$name.id_rsa"