Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: Add SIGKILL functional test #13924

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions tests/functional/agent/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ func (srv *Server) handleTesterRequest(req *rpcpb.Request) (resp *rpcpb.Response

case rpcpb.Operation_SIGTERM_ETCD:
return srv.handle_SIGTERM_ETCD()
case rpcpb.Operation_SIGKILL_ETCD:
return srv.handle_SIGKILL_ETCD()
case rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA:
return srv.handle_SIGQUIT_ETCD_AND_REMOVE_DATA()

Expand Down Expand Up @@ -551,6 +553,23 @@ func (srv *Server) handle_SIGTERM_ETCD() (*rpcpb.Response, error) {
}, nil
}

func (srv *Server) handle_SIGKILL_ETCD() (*rpcpb.Response, error) {
if err := srv.stopEtcd(syscall.SIGKILL); err != nil {
return nil, err
}

if srv.etcdServer != nil {
srv.etcdServer.GetLogger().Sync()
} else {
srv.etcdLogFile.Sync()
}

return &rpcpb.Response{
Success: true,
Status: "killed etcd",
}, nil
}

func (srv *Server) handle_SIGQUIT_ETCD_AND_REMOVE_DATA() (*rpcpb.Response, error) {
err := srv.stopEtcd(syscall.SIGQUIT)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions tests/functional/functional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ tester-config:
- SIGTERM_ALL
- SIGQUIT_AND_REMOVE_ONE_FOLLOWER
- SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT
- SIGKILL_FOLLOWER
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SIGKILL_ONE_FOLLOWER so as to keep the naming consistent as the existing SIGTERM_ONE_FOLLOWER and SIGQUIT_AND_REMOVE_ONE_FOLLOWER ?

- SIGKILL_LEADER
- BLACKHOLE_PEER_PORT_TX_RX_LEADER
- BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT
- BLACKHOLE_PEER_PORT_TX_RX_QUORUM
Expand Down
400 changes: 206 additions & 194 deletions tests/functional/rpcpb/rpc.pb.go

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions tests/functional/rpcpb/rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,9 @@ enum Operation {
// SIGQUIT_ETCD_AND_REMOVE_DATA kills etcd process and removes all data
// directories to simulate destroying the whole machine.
SIGQUIT_ETCD_AND_REMOVE_DATA = 21;
// SIGKILL_ETCD kills etcd process while keeping data directories
// and previous etcd configurations.
SIGKILL_ETCD = 22;

// SAVE_SNAPSHOT is sent to trigger local member to download its snapshot
// onto its local disk with the specified path from tester.
Expand Down Expand Up @@ -426,6 +429,8 @@ enum Case {
// each member must be able to process client requests.
SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH = 14;

SIGKILL_FOLLOWER = 15;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SIGKILL_ONE_FOLLOWER? The same as above.

SIGKILL_LEADER = 16;
// BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER drops all outgoing/incoming
// packets from/to the peer port on a randomly chosen follower
// (non-leader), and waits for "delay-ms" until recovery.
Expand Down
42 changes: 42 additions & 0 deletions tests/functional/tester/case_sigquit_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,30 @@ func recover_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
return err
}

func inject_SIGKILL(clus *Cluster, index int) error {
clus.lg.Info(
"disastrous machine failure START",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(SIGKILL)

zap.String("target-endpoint", clus.Members[index].EtcdClientEndpoint),
)
err := clus.sendOp(index, rpcpb.Operation_SIGKILL_ETCD)
clus.lg.Info(
"disastrous machine failure END",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SIGKILL

zap.String("target-endpoint", clus.Members[index].EtcdClientEndpoint),
zap.Error(err),
)
return err
}

func recover_SIGKILL(clus *Cluster, idx1 int) error {
err := clus.sendOp(idx1, rpcpb.Operation_RESTART_ETCD)
clus.lg.Info(
"restart machine",
zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
zap.Error(err),
)
return err
}

func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Case {
cc := caseByFunc{
rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER,
Expand All @@ -187,6 +211,24 @@ func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Case {
}
}

func new_Case_SIGKILL_FOLLOWER() Case {
cc := caseByFunc{
rpcpbCase: rpcpb.Case_SIGKILL_FOLLOWER,
injectMember: inject_SIGKILL,
recoverMember: recover_SIGKILL,
}
return &caseFollower{cc, -1, -1}
}

func new_Case_SIGKILL_LEADER() Case {
cc := caseByFunc{
rpcpbCase: rpcpb.Case_SIGKILL_LEADER,
injectMember: inject_SIGKILL,
recoverMember: recover_SIGKILL,
}
return &caseLeader{cc, -1, -1}
}

func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case {
return &caseUntilSnapshot{
rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT,
Expand Down
6 changes: 6 additions & 0 deletions tests/functional/tester/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,12 @@ func (clus *Cluster) updateCases() {
case "SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH":
clus.cases = append(clus.cases,
new_Case_SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH(clus))
case "SIGKILL_FOLLOWER":
clus.cases = append(clus.cases,
new_Case_SIGKILL_FOLLOWER())
case "SIGKILL_LEADER":
clus.cases = append(clus.cases,
new_Case_SIGKILL_LEADER())

case "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER":
clus.cases = append(clus.cases,
Expand Down
2 changes: 2 additions & 0 deletions tests/functional/tester/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ func Test_read(t *testing.T) {
"SIGTERM_ALL",
"SIGQUIT_AND_REMOVE_ONE_FOLLOWER",
"SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT",
"SIGKILL_FOLLOWER",
"SIGKILL_LEADER",
// "SIGQUIT_AND_REMOVE_LEADER",
// "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT",
// "SIGQUIT_AND_REMOVE_QUORUM_AND_RESTORE_LEADER_SNAPSHOT_FROM_SCRATCH",
Expand Down