From 62bc962f1c8f49014ebf4af3b637f907a30e9284 Mon Sep 17 00:00:00 2001 From: Hengfeng Li Date: Fri, 13 Mar 2020 13:43:53 +1100 Subject: [PATCH] spanner: update the health check interval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This includes the following changes: * adjust the default interval to 50 minutes. * the first healthcheck is scheduled to [interval*0.2, interval*1.1), i.e., [10, 55) mins. * the non-first healthchecks are scheduled to [interval*0.9, interval*1.1), so the new range will become [45, 55) mins. * add a separately sourced random generator in session pool. Fixes #1817 Change-Id: I7dc612063815279b2f6a3b2b24c17ae6d52c14a2 Reviewed-on: https://code-review.googlesource.com/c/gocloud/+/53252 Reviewed-by: kokoro Reviewed-by: Knut Olav Løite --- spanner/session.go | 31 ++++++++++++++++++---- spanner/session_test.go | 59 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/spanner/session.go b/spanner/session.go index 3d861c9c507b..3f6ea4844173 100644 --- a/spanner/session.go +++ b/spanner/session.go @@ -35,6 +35,8 @@ import ( "google.golang.org/grpc/metadata" ) +const healthCheckIntervalMins = 50 + // sessionHandle is an interface for transactions to access Cloud Spanner // sessions safely. It is generated by sessionPool.take(). type sessionHandle struct { @@ -192,6 +194,8 @@ type session struct { // tx contains the transaction id if the session has been prepared for // write. tx transactionID + // firstHCDone indicates whether the first health check is done or not. + firstHCDone bool } // isValid returns true if the session is still valid for use. @@ -434,7 +438,7 @@ var DefaultSessionPoolConfig = SessionPoolConfig{ MaxBurst: 10, WriteSessions: 0.2, HealthCheckWorkers: 10, - HealthCheckInterval: 30 * time.Minute, + HealthCheckInterval: healthCheckIntervalMins * time.Minute, } // errMinOpenedGTMapOpened returns error for SessionPoolConfig.MaxOpened < SessionPoolConfig.MinOpened when SessionPoolConfig.MaxOpened is set. @@ -520,6 +524,9 @@ type sessionPool struct { // mw is the maintenance window containing statistics for the max number of // sessions checked out of the pool during the last 10 minutes. mw *maintenanceWindow + + // rand is a separately sourced random generator. + rand *rand.Rand } // newSessionPool creates a new session pool. @@ -533,6 +540,7 @@ func newSessionPool(sc *sessionClient, config SessionPoolConfig) (*sessionPool, mayGetSession: make(chan struct{}), SessionPoolConfig: config, mw: newMaintenanceWindow(config.MaxOpened), + rand: rand.New(rand.NewSource(time.Now().UnixNano())), } if config.HealthCheckWorkers == 0 { // With 10 workers and assuming average latency of 5ms for @@ -544,7 +552,7 @@ func newSessionPool(sc *sessionClient, config SessionPoolConfig) (*sessionPool, config.HealthCheckWorkers = 10 } if config.HealthCheckInterval == 0 { - config.HealthCheckInterval = 5 * time.Minute + config.HealthCheckInterval = healthCheckIntervalMins * time.Minute } if config.healthCheckSampleInterval == 0 { config.healthCheckSampleInterval = time.Minute @@ -1187,9 +1195,22 @@ func (hc *healthChecker) getInterval() time.Duration { // scheduledHCLocked schedules next healthcheck on session s with the assumption // that hc.mu is being held. func (hc *healthChecker) scheduledHCLocked(s *session) { - // The next healthcheck will be scheduled after - // [interval*0.5, interval*1.5) ns. - nsFromNow := rand.Int63n(int64(hc.interval)) + int64(hc.interval)/2 + var constPart, randPart float64 + if !s.firstHCDone { + // The first check will be scheduled in a large range to make requests + // more evenly distributed. The first healthcheck will be scheduled + // after [interval*0.2, interval*1.1) ns. + constPart = float64(hc.interval) * 0.2 + randPart = hc.pool.rand.Float64() * float64(hc.interval) * 0.9 + s.firstHCDone = true + } else { + // The next healthcheck will be scheduled after + // [interval*0.9, interval*1.1) ns. + constPart = float64(hc.interval) * 0.9 + randPart = hc.pool.rand.Float64() * float64(hc.interval) * 0.2 + } + // math.Ceil makes the value to be at least 1 ns. + nsFromNow := int64(math.Ceil(constPart + randPart)) s.setNextCheck(time.Now().Add(time.Duration(nsFromNow))) if hi := s.getHcIndex(); hi != -1 { // Session is still being tracked by healthcheck workers. diff --git a/spanner/session_test.go b/spanner/session_test.go index 0aee9fe8c1cf..a3ef5198e371 100644 --- a/spanner/session_test.go +++ b/spanner/session_test.go @@ -880,6 +880,65 @@ func TestHealthCheckScheduler(t *testing.T) { }) } +// TestHealthCheck_FirstHealthCheck tests if the first healthcheck scheduling +// works properly. +func TestHealthCheck_FirstHealthCheck(t *testing.T) { + t.Parallel() + _, client, teardown := setupMockedTestServerWithConfig(t, + ClientConfig{ + SessionPoolConfig: SessionPoolConfig{ + MaxOpened: 0, + MinOpened: 0, + HealthCheckInterval: 50 * time.Minute, + }, + }) + defer teardown() + sp := client.idleSessions + + now := time.Now() + start := now.Add(time.Duration(float64(sp.hc.interval) * 0.2)) + // A second is added to avoid the edge case. + end := now.Add(time.Duration(float64(sp.hc.interval)*1.1) + time.Second) + + s := &session{} + sp.hc.scheduledHCLocked(s) + + if s.nextCheck.Before(start) || s.nextCheck.After(end) { + t.Fatalf("The first healthcheck schedule is not in the correct range: %v", s.nextCheck) + } + if !s.firstHCDone { + t.Fatal("The flag 'firstHCDone' should be set to true after the first healthcheck.") + } +} + +// TestHealthCheck_NonFirstHealthCheck tests if the scheduling after the first +// health check works properly. +func TestHealthCheck_NonFirstHealthCheck(t *testing.T) { + t.Parallel() + _, client, teardown := setupMockedTestServerWithConfig(t, + ClientConfig{ + SessionPoolConfig: SessionPoolConfig{ + MaxOpened: 0, + MinOpened: 0, + HealthCheckInterval: 50 * time.Minute, + }, + }) + defer teardown() + sp := client.idleSessions + + now := time.Now() + start := now.Add(time.Duration(float64(sp.hc.interval) * 0.9)) + // A second is added to avoid the edge case. + end := now.Add(time.Duration(float64(sp.hc.interval)*1.1) + time.Second) + + s := &session{firstHCDone: true} + sp.hc.scheduledHCLocked(s) + + if s.nextCheck.Before(start) || s.nextCheck.After(end) { + t.Fatalf("The non-first healthcheck schedule is not in the correct range: %v", s.nextCheck) + } +} + // Tests that a fractions of sessions are prepared for write by health checker. func TestWriteSessionsPrepared(t *testing.T) { t.Parallel()