From 495d29ae3a3df6b7b49cfefeec4db957c8b0c91c Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Tue, 5 Feb 2019 15:37:08 -0500 Subject: [PATCH 1/7] Call RemoveServer for reap events In both the leaving and reaping case the server is no longer available and therefore we must remove it from our list of servers. --- agent/router/serf_adapter.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/agent/router/serf_adapter.go b/agent/router/serf_adapter.go index 12941b140a21..55c711a43538 100644 --- a/agent/router/serf_adapter.go +++ b/agent/router/serf_adapter.go @@ -53,7 +53,7 @@ func HandleSerfEvents(logger *log.Logger, router *Router, areaID types.AreaID, s case serf.EventMemberJoin: handleMemberEvent(logger, router.AddServer, areaID, e) - case serf.EventMemberLeave: + case serf.EventMemberLeave, serf.EventMemberReap: handleMemberEvent(logger, router.RemoveServer, areaID, e) case serf.EventMemberFailed: @@ -61,7 +61,6 @@ func HandleSerfEvents(logger *log.Logger, router *Router, areaID types.AreaID, s // All of these event types are ignored. case serf.EventMemberUpdate: - case serf.EventMemberReap: case serf.EventUser: case serf.EventQuery: From c318f387a9a8369f197f4e7c24478c63a8877ba2 Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Tue, 5 Feb 2019 16:38:23 -0500 Subject: [PATCH 2/7] Add test for reaping servers from the router --- agent/consul/server_test.go | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index d89db26193f2..8c93739159ea 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -20,6 +20,8 @@ import ( "github.com/hashicorp/consul/testutil/retry" "github.com/hashicorp/consul/types" "github.com/hashicorp/go-uuid" + + "github.com/stretchr/testify/require" ) func configureTLS(config *Config) { @@ -267,6 +269,46 @@ func TestServer_JoinWAN(t *testing.T) { }) } +func TestServer_WANReap(t *testing.T) { + t.Parallel() + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc1" + c.Bootstrap = true + c.SerfFloodInterval = 100 * time.Millisecond + c.SerfWANConfig.ReconnectTimeout = 250 * time.Millisecond + c.SerfWANConfig.ReapInterval = 500 * time.Millisecond + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, s2 := testServerDC(t, "dc2") + defer os.RemoveAll(dir2) + + // Try to join + joinWAN(t, s2, s1) + retry.Run(t, func(r *retry.R) { + require.Len(r, s1.WANMembers(), 2) + require.Len(r, s2.WANMembers(), 2) + }) + + // Check the router has both + retry.Run(t, func(r *retry.R) { + require.Len(r, s1.router.GetDatacenters(), 2) + require.Len(r, s2.router.GetDatacenters(), 2) + }) + + // shutdown the second dc + s2.Shutdown() + + retry.Run(t, func(r *retry.R) { + require.Len(r, s1.WANMembers(), 1) + datacenters := s1.router.GetDatacenters() + require.Len(r, datacenters, 1) + require.Equal(r, "dc1", datacenters[0]) + }) + +} + func TestServer_JoinWAN_Flood(t *testing.T) { t.Parallel() // Set up two servers in a WAN. From 0337fa867375e68b387c61c3777f1cb87913496f Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Fri, 8 Feb 2019 17:06:30 -0500 Subject: [PATCH 3/7] Re-Add servers for member updates. --- agent/router/serf_adapter.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/agent/router/serf_adapter.go b/agent/router/serf_adapter.go index 55c711a43538..4ca9317d90e4 100644 --- a/agent/router/serf_adapter.go +++ b/agent/router/serf_adapter.go @@ -50,7 +50,7 @@ func HandleSerfEvents(logger *log.Logger, router *Router, areaID types.AreaID, s case e := <-eventCh: switch e.EventType() { - case serf.EventMemberJoin: + case serf.EventMemberJoin, serf.EventMemberUpdate: handleMemberEvent(logger, router.AddServer, areaID, e) case serf.EventMemberLeave, serf.EventMemberReap: @@ -60,7 +60,6 @@ func HandleSerfEvents(logger *log.Logger, router *Router, areaID types.AreaID, s handleMemberEvent(logger, router.FailServer, areaID, e) // All of these event types are ignored. - case serf.EventMemberUpdate: case serf.EventUser: case serf.EventQuery: From 8bc52373a8ee98d4a8472ef8655109371f42eff1 Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Thu, 28 Feb 2019 15:43:50 -0500 Subject: [PATCH 4/7] Handle EventMemberReaped and EventMemberUpdated on the LAN This ensures we add/remove updated/failed servers properly. --- agent/consul/client_serf.go | 6 ++---- agent/consul/server_serf.go | 8 ++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/agent/consul/client_serf.go b/agent/consul/client_serf.go index 51ce0d4ef332..a045e4597a52 100644 --- a/agent/consul/client_serf.go +++ b/agent/consul/client_serf.go @@ -68,14 +68,12 @@ func (c *Client) lanEventHandler() { select { case e := <-c.eventCh: switch e.EventType() { - case serf.EventMemberJoin: + case serf.EventMemberJoin, serf.EventMemberUpdate: c.nodeJoin(e.(serf.MemberEvent)) - case serf.EventMemberLeave, serf.EventMemberFailed: + case serf.EventMemberLeave, serf.EventMemberFailed, serf.EventMemberReap: c.nodeFail(e.(serf.MemberEvent)) case serf.EventUser: c.localEvent(e.(serf.UserEvent)) - case serf.EventMemberUpdate: // Ignore - case serf.EventMemberReap: // Ignore case serf.EventQuery: // Ignore default: c.logger.Printf("[WARN] consul: unhandled LAN Serf Event: %#v", e) diff --git a/agent/consul/server_serf.go b/agent/consul/server_serf.go index 087188a4f21a..9a805301f760 100644 --- a/agent/consul/server_serf.go +++ b/agent/consul/server_serf.go @@ -133,20 +133,16 @@ func (s *Server) lanEventHandler() { select { case e := <-s.eventChLAN: switch e.EventType() { - case serf.EventMemberJoin: + case serf.EventMemberJoin, serf.EventMemberUpdate: s.lanNodeJoin(e.(serf.MemberEvent)) s.localMemberEvent(e.(serf.MemberEvent)) - case serf.EventMemberLeave, serf.EventMemberFailed: + case serf.EventMemberLeave, serf.EventMemberFailed, serf.EventMemberReap: s.lanNodeFailed(e.(serf.MemberEvent)) s.localMemberEvent(e.(serf.MemberEvent)) - case serf.EventMemberReap: - s.localMemberEvent(e.(serf.MemberEvent)) case serf.EventUser: s.localEvent(e.(serf.UserEvent)) - case serf.EventMemberUpdate: - s.localMemberEvent(e.(serf.MemberEvent)) case serf.EventQuery: // Ignore default: s.logger.Printf("[WARN] consul: Unhandled LAN Serf Event: %#v", e) From 9357bfb06987dfe33f9d2622772451edf4f29e0f Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Thu, 28 Feb 2019 16:51:59 -0500 Subject: [PATCH 5/7] Add a server reap lan node test --- agent/consul/server_test.go | 61 +++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index 8c93739159ea..ddb44d489dbc 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -237,6 +237,67 @@ func TestServer_JoinLAN(t *testing.T) { }) } +func TestServer_LANReap(t *testing.T) { + t.Parallel() + dir1, s1 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc1" + c.Bootstrap = true + c.SerfFloodInterval = 100 * time.Millisecond + c.SerfLANConfig.ReconnectTimeout = 250 * time.Millisecond + c.SerfLANConfig.ReapInterval = 500 * time.Millisecond + }) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, s2 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc1" + c.Bootstrap = false + c.SerfFloodInterval = 100 * time.Millisecond + c.SerfLANConfig.ReconnectTimeout = 250 * time.Millisecond + c.SerfLANConfig.ReapInterval = 500 * time.Millisecond + }) + defer os.RemoveAll(dir2) + + dir3, s3 := testServerWithConfig(t, func(c *Config) { + c.Datacenter = "dc1" + c.Bootstrap = false + c.SerfFloodInterval = 100 * time.Millisecond + c.SerfLANConfig.ReconnectTimeout = 250 * time.Millisecond + c.SerfLANConfig.ReapInterval = 500 * time.Millisecond + }) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + joinLAN(t, s2, s1) + joinLAN(t, s3, s1) + + testrpc.WaitForLeader(t, s3.RPC, "dc1") + + retry.Run(t, func(r *retry.R) { + require.Len(r, s1.LANMembers(), 3) + require.Len(r, s2.LANMembers(), 3) + require.Len(r, s3.LANMembers(), 3) + }) + + // Check the router has both + retry.Run(t, func(r *retry.R) { + require.Len(r, s1.serverLookup.Servers(), 3) + require.Len(r, s2.serverLookup.Servers(), 3) + require.Len(r, s3.serverLookup.Servers(), 3) + }) + + // shutdown the second dc + s2.Shutdown() + + retry.Run(t, func(r *retry.R) { + require.Len(r, s1.LANMembers(), 2) + servers := s1.serverLookup.Servers() + require.Len(r, servers, 2) + // require.Equal(r, s1.config.NodeName, servers[0].Name) + }) +} + func TestServer_JoinWAN(t *testing.T) { t.Parallel() dir1, s1 := testServer(t) From 6b0a6dff90e10f436d85b94430a58cc84d7ebefb Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Fri, 1 Mar 2019 16:24:15 -0500 Subject: [PATCH 6/7] Add a test for reap LAN servers from clients. --- agent/consul/client_test.go | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/agent/consul/client_test.go b/agent/consul/client_test.go index d28e3994ee81..3c82551b2cdf 100644 --- a/agent/consul/client_test.go +++ b/agent/consul/client_test.go @@ -99,6 +99,46 @@ func TestClient_JoinLAN(t *testing.T) { }) } +func TestClient_LANReap(t *testing.T) { + t.Parallel() + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + + dir2, c1 := testClientWithConfig(t, func(c *Config) { + c.Datacenter = "dc1" + c.SerfFloodInterval = 100 * time.Millisecond + c.SerfLANConfig.ReconnectTimeout = 250 * time.Millisecond + c.SerfLANConfig.ReapInterval = 500 * time.Millisecond + }) + defer os.RemoveAll(dir2) + defer c1.Shutdown() + + // Try to join + joinLAN(t, c1, s1) + testrpc.WaitForLeader(t, c1.RPC, "dc1") + + retry.Run(t, func(r *retry.R) { + require.Len(r, s1.LANMembers(), 2) + require.Len(r, c1.LANMembers(), 2) + }) + + // Check the router has both + retry.Run(t, func(r *retry.R) { + server := c1.routers.FindServer() + require.NotNil(t, server) + require.Equal(t, s1.config.NodeName, server.Name) + }) + + // shutdown the second dc + s1.Shutdown() + + retry.Run(t, func(r *retry.R) { + require.Len(r, c1.LANMembers(), 1) + server := c1.routers.FindServer() + require.Nil(t, server) + }) +} + func TestClient_JoinLAN_Invalid(t *testing.T) { t.Parallel() dir1, s1 := testServer(t) From 13ee12e2017738e88de39c58a8d5df81899091b3 Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Fri, 1 Mar 2019 17:03:24 -0500 Subject: [PATCH 7/7] Dont touch the EventMemberUpdate handling I dont think this will cause a problem to ignore after further investigation. --- agent/consul/client_serf.go | 3 ++- agent/consul/server_serf.go | 4 +++- agent/router/serf_adapter.go | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/agent/consul/client_serf.go b/agent/consul/client_serf.go index a045e4597a52..5b0c769fafb0 100644 --- a/agent/consul/client_serf.go +++ b/agent/consul/client_serf.go @@ -68,12 +68,13 @@ func (c *Client) lanEventHandler() { select { case e := <-c.eventCh: switch e.EventType() { - case serf.EventMemberJoin, serf.EventMemberUpdate: + case serf.EventMemberJoin: c.nodeJoin(e.(serf.MemberEvent)) case serf.EventMemberLeave, serf.EventMemberFailed, serf.EventMemberReap: c.nodeFail(e.(serf.MemberEvent)) case serf.EventUser: c.localEvent(e.(serf.UserEvent)) + case serf.EventMemberUpdate: // Ignore case serf.EventQuery: // Ignore default: c.logger.Printf("[WARN] consul: unhandled LAN Serf Event: %#v", e) diff --git a/agent/consul/server_serf.go b/agent/consul/server_serf.go index 9a805301f760..94020816de2a 100644 --- a/agent/consul/server_serf.go +++ b/agent/consul/server_serf.go @@ -133,7 +133,7 @@ func (s *Server) lanEventHandler() { select { case e := <-s.eventChLAN: switch e.EventType() { - case serf.EventMemberJoin, serf.EventMemberUpdate: + case serf.EventMemberJoin: s.lanNodeJoin(e.(serf.MemberEvent)) s.localMemberEvent(e.(serf.MemberEvent)) @@ -143,6 +143,8 @@ func (s *Server) lanEventHandler() { case serf.EventUser: s.localEvent(e.(serf.UserEvent)) + case serf.EventMemberUpdate: + s.localMemberEvent(e.(serf.MemberEvent)) case serf.EventQuery: // Ignore default: s.logger.Printf("[WARN] consul: Unhandled LAN Serf Event: %#v", e) diff --git a/agent/router/serf_adapter.go b/agent/router/serf_adapter.go index 4ca9317d90e4..55c711a43538 100644 --- a/agent/router/serf_adapter.go +++ b/agent/router/serf_adapter.go @@ -50,7 +50,7 @@ func HandleSerfEvents(logger *log.Logger, router *Router, areaID types.AreaID, s case e := <-eventCh: switch e.EventType() { - case serf.EventMemberJoin, serf.EventMemberUpdate: + case serf.EventMemberJoin: handleMemberEvent(logger, router.AddServer, areaID, e) case serf.EventMemberLeave, serf.EventMemberReap: @@ -60,6 +60,7 @@ func HandleSerfEvents(logger *log.Logger, router *Router, areaID types.AreaID, s handleMemberEvent(logger, router.FailServer, areaID, e) // All of these event types are ignored. + case serf.EventMemberUpdate: case serf.EventUser: case serf.EventQuery: