Skip to content

Commit

Permalink
spanner: update the health check interval
Browse files Browse the repository at this point in the history
This includes the following changes:

* adjust the default interval to 50 minutes.
* the first healthcheck is scheduled to
[interval*0.2, interval*1.1), i.e., [10, 55) mins.
* the non-first healthchecks are scheduled to
[interval*0.9, interval*1.1), so the new range will become [45, 55)
mins.
* add a separately sourced random generator in session pool.

Fixes #1817

Change-Id: I7dc612063815279b2f6a3b2b24c17ae6d52c14a2
Reviewed-on: https://code-review.googlesource.com/c/gocloud/+/53252
Reviewed-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Knut Olav Løite <koloite@gmail.com>
  • Loading branch information
hengfengli committed Mar 18, 2020
1 parent 85bbe53 commit 62bc962
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 5 deletions.
31 changes: 26 additions & 5 deletions spanner/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import (
"google.golang.org/grpc/metadata"
)

const healthCheckIntervalMins = 50

// sessionHandle is an interface for transactions to access Cloud Spanner
// sessions safely. It is generated by sessionPool.take().
type sessionHandle struct {
Expand Down Expand Up @@ -192,6 +194,8 @@ type session struct {
// tx contains the transaction id if the session has been prepared for
// write.
tx transactionID
// firstHCDone indicates whether the first health check is done or not.
firstHCDone bool
}

// isValid returns true if the session is still valid for use.
Expand Down Expand Up @@ -434,7 +438,7 @@ var DefaultSessionPoolConfig = SessionPoolConfig{
MaxBurst: 10,
WriteSessions: 0.2,
HealthCheckWorkers: 10,
HealthCheckInterval: 30 * time.Minute,
HealthCheckInterval: healthCheckIntervalMins * time.Minute,
}

// errMinOpenedGTMapOpened returns error for SessionPoolConfig.MaxOpened < SessionPoolConfig.MinOpened when SessionPoolConfig.MaxOpened is set.
Expand Down Expand Up @@ -520,6 +524,9 @@ type sessionPool struct {
// mw is the maintenance window containing statistics for the max number of
// sessions checked out of the pool during the last 10 minutes.
mw *maintenanceWindow

// rand is a separately sourced random generator.
rand *rand.Rand
}

// newSessionPool creates a new session pool.
Expand All @@ -533,6 +540,7 @@ func newSessionPool(sc *sessionClient, config SessionPoolConfig) (*sessionPool,
mayGetSession: make(chan struct{}),
SessionPoolConfig: config,
mw: newMaintenanceWindow(config.MaxOpened),
rand: rand.New(rand.NewSource(time.Now().UnixNano())),
}
if config.HealthCheckWorkers == 0 {
// With 10 workers and assuming average latency of 5ms for
Expand All @@ -544,7 +552,7 @@ func newSessionPool(sc *sessionClient, config SessionPoolConfig) (*sessionPool,
config.HealthCheckWorkers = 10
}
if config.HealthCheckInterval == 0 {
config.HealthCheckInterval = 5 * time.Minute
config.HealthCheckInterval = healthCheckIntervalMins * time.Minute
}
if config.healthCheckSampleInterval == 0 {
config.healthCheckSampleInterval = time.Minute
Expand Down Expand Up @@ -1187,9 +1195,22 @@ func (hc *healthChecker) getInterval() time.Duration {
// scheduledHCLocked schedules next healthcheck on session s with the assumption
// that hc.mu is being held.
func (hc *healthChecker) scheduledHCLocked(s *session) {
// The next healthcheck will be scheduled after
// [interval*0.5, interval*1.5) ns.
nsFromNow := rand.Int63n(int64(hc.interval)) + int64(hc.interval)/2
var constPart, randPart float64
if !s.firstHCDone {
// The first check will be scheduled in a large range to make requests
// more evenly distributed. The first healthcheck will be scheduled
// after [interval*0.2, interval*1.1) ns.
constPart = float64(hc.interval) * 0.2
randPart = hc.pool.rand.Float64() * float64(hc.interval) * 0.9
s.firstHCDone = true
} else {
// The next healthcheck will be scheduled after
// [interval*0.9, interval*1.1) ns.
constPart = float64(hc.interval) * 0.9
randPart = hc.pool.rand.Float64() * float64(hc.interval) * 0.2
}
// math.Ceil makes the value to be at least 1 ns.
nsFromNow := int64(math.Ceil(constPart + randPart))
s.setNextCheck(time.Now().Add(time.Duration(nsFromNow)))
if hi := s.getHcIndex(); hi != -1 {
// Session is still being tracked by healthcheck workers.
Expand Down
59 changes: 59 additions & 0 deletions spanner/session_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,65 @@ func TestHealthCheckScheduler(t *testing.T) {
})
}

// TestHealthCheck_FirstHealthCheck tests if the first healthcheck scheduling
// works properly.
func TestHealthCheck_FirstHealthCheck(t *testing.T) {
t.Parallel()
_, client, teardown := setupMockedTestServerWithConfig(t,
ClientConfig{
SessionPoolConfig: SessionPoolConfig{
MaxOpened: 0,
MinOpened: 0,
HealthCheckInterval: 50 * time.Minute,
},
})
defer teardown()
sp := client.idleSessions

now := time.Now()
start := now.Add(time.Duration(float64(sp.hc.interval) * 0.2))
// A second is added to avoid the edge case.
end := now.Add(time.Duration(float64(sp.hc.interval)*1.1) + time.Second)

s := &session{}
sp.hc.scheduledHCLocked(s)

if s.nextCheck.Before(start) || s.nextCheck.After(end) {
t.Fatalf("The first healthcheck schedule is not in the correct range: %v", s.nextCheck)
}
if !s.firstHCDone {
t.Fatal("The flag 'firstHCDone' should be set to true after the first healthcheck.")
}
}

// TestHealthCheck_NonFirstHealthCheck tests if the scheduling after the first
// health check works properly.
func TestHealthCheck_NonFirstHealthCheck(t *testing.T) {
t.Parallel()
_, client, teardown := setupMockedTestServerWithConfig(t,
ClientConfig{
SessionPoolConfig: SessionPoolConfig{
MaxOpened: 0,
MinOpened: 0,
HealthCheckInterval: 50 * time.Minute,
},
})
defer teardown()
sp := client.idleSessions

now := time.Now()
start := now.Add(time.Duration(float64(sp.hc.interval) * 0.9))
// A second is added to avoid the edge case.
end := now.Add(time.Duration(float64(sp.hc.interval)*1.1) + time.Second)

s := &session{firstHCDone: true}
sp.hc.scheduledHCLocked(s)

if s.nextCheck.Before(start) || s.nextCheck.After(end) {
t.Fatalf("The non-first healthcheck schedule is not in the correct range: %v", s.nextCheck)
}
}

// Tests that a fractions of sessions are prepared for write by health checker.
func TestWriteSessionsPrepared(t *testing.T) {
t.Parallel()
Expand Down

0 comments on commit 62bc962

Please sign in to comment.