Skip to content

Commit

Permalink
feat: implement etcd remove-member cli command
Browse files Browse the repository at this point in the history
Fixes: #3219

We already have `etcd leave`, which makes the node exclude itself from
etcd members.
But in case if the node can't remove itself because it doesn't have
connection to etcd we need this etcd remove-member cli, which basically removes
a node from a different node.

No unit tests for that as it's going to destroy the test cluster.

Signed-off-by: Artem Chernyshev <artem.0xD2@gmail.com>
  • Loading branch information
Unix4ever authored and talos-bot committed Mar 1, 2021
1 parent c8ae009 commit 376fdcf
Show file tree
Hide file tree
Showing 8 changed files with 960 additions and 575 deletions.
8 changes: 8 additions & 0 deletions api/machine/machine.proto
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ service MachineService {
rpc Dmesg(DmesgRequest) returns (stream common.Data);
rpc Events(EventsRequest) returns (stream Event);
rpc EtcdMemberList(EtcdMemberListRequest) returns (EtcdMemberListResponse);
rpc EtcdRemoveMember(EtcdRemoveMemberRequest)
returns (EtcdRemoveMemberResponse);
rpc EtcdLeaveCluster(EtcdLeaveClusterRequest)
returns (EtcdLeaveClusterResponse);
rpc EtcdForfeitLeadership(EtcdForfeitLeadershipRequest)
Expand Down Expand Up @@ -736,6 +738,12 @@ message EtcdLeaveClusterRequest {}
message EtcdLeaveCluster { common.Metadata metadata = 1; }
message EtcdLeaveClusterResponse { repeated EtcdLeaveCluster messages = 1; }

message EtcdRemoveMemberRequest {
string member = 1;
}
message EtcdRemoveMember { common.Metadata metadata = 1; }
message EtcdRemoveMemberResponse { repeated EtcdRemoveMember messages = 1; }

message EtcdForfeitLeadershipRequest {}
message EtcdForfeitLeadership {
common.Metadata metadata = 1;
Expand Down
18 changes: 17 additions & 1 deletion cmd/talosctl/cmd/talos/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,22 @@ var etcdLeaveCmd = &cobra.Command{
},
}

var etcdMemberRemoveCmd = &cobra.Command{
Use: "remove-member <hostname>",
Short: "Remove the node from etcd cluster",
Long: `Use this command only if you want to remove a member which is in broken state.
If there is no access to the node, or the node can't access etcd to call etcd leave.
Always prefer etcd leave over this command.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return WithClient(func(ctx context.Context, c *client.Client) error {
return c.EtcdRemoveMember(ctx, &machine.EtcdRemoveMemberRequest{
Member: args[0],
})
})
},
}

var etcdForfeitLeadershipCmd = &cobra.Command{
Use: "forfeit-leadership",
Short: "Tell node to forfeit etcd cluster leadership",
Expand Down Expand Up @@ -102,6 +118,6 @@ var etcdMemberListCmd = &cobra.Command{
}

func init() {
etcdCmd.AddCommand(etcdLeaveCmd, etcdForfeitLeadershipCmd, etcdMemberListCmd)
etcdCmd.AddCommand(etcdLeaveCmd, etcdForfeitLeadershipCmd, etcdMemberListCmd, etcdMemberRemoveCmd)
addCommand(etcdCmd)
}
23 changes: 23 additions & 0 deletions internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,29 @@ func (s *Server) EtcdMemberList(ctx context.Context, in *machine.EtcdMemberListR
return reply, nil
}

// EtcdRemoveMember implements the machine.MachineServer interface.
func (s *Server) EtcdRemoveMember(ctx context.Context, in *machine.EtcdRemoveMemberRequest) (reply *machine.EtcdRemoveMemberResponse, err error) {
client, err := etcd.NewClientFromControlPlaneIPs(ctx, s.Controller.Runtime().Config().Cluster().CA(), s.Controller.Runtime().Config().Cluster().Endpoint())
if err != nil {
return nil, fmt.Errorf("failed to create etcd client: %w", err)
}

// nolint: errcheck
defer client.Close()

if err = client.RemoveMember(ctx, in.Member); err != nil {
return nil, fmt.Errorf("failed to remove member: %w", err)
}

reply = &machine.EtcdRemoveMemberResponse{
Messages: []*machine.EtcdRemoveMember{
{},
},
}

return reply, nil
}

// EtcdLeaveCluster implements the machine.MachineServer interface.
func (s *Server) EtcdLeaveCluster(ctx context.Context, in *machine.EtcdLeaveClusterRequest) (reply *machine.EtcdLeaveClusterResponse, err error) {
client, err := etcd.NewClientFromControlPlaneIPs(ctx, s.Controller.Runtime().Config().Cluster().CA(), s.Controller.Runtime().Config().Cluster().Endpoint())
Expand Down
9 changes: 7 additions & 2 deletions internal/pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,19 @@ func validateMemberHealth(ctx context.Context, memberURIs []string) (err error)
}

// LeaveCluster removes the current member from the etcd cluster.
//
// nolint: gocyclo
func (c *Client) LeaveCluster(ctx context.Context) error {
hostname, err := os.Hostname()
if err != nil {
return err
}

return c.RemoveMember(ctx, hostname)
}

// RemoveMember removes the member from the etcd cluster.
//
// nolint: gocyclo
func (c *Client) RemoveMember(ctx context.Context, hostname string) error {
resp, err := c.MemberList(ctx)
if err != nil {
return err
Expand Down
Loading

0 comments on commit 376fdcf

Please sign in to comment.