diff --git a/CHANGELOG.md b/CHANGELOG.md index c964f7ae7..7f50d722e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * [CHANGE] Rename `kv/kvtls` to `crypto/tls`. #39 * [CHANGE] spanlogger: Take interface implementation for extracting tenant ID. #59 * [CHANGE] The `status_code` label on gRPC client metrics has changed from '200' and '500' to '2xx', '5xx', '4xx', 'cancel' or 'error'. #68 +* [CHANGE] Memberlist: changed probe interval from `1s` to `5s` and probe timeout from `500ms` to `2s`. #90 * [ENHANCEMENT] Add middleware package. #38 * [ENHANCEMENT] Add the ring package #45 * [ENHANCEMENT] Add limiter package. #41 diff --git a/kv/memberlist/memberlist_client.go b/kv/memberlist/memberlist_client.go index 5880bd394..d7ad176d0 100644 --- a/kv/memberlist/memberlist_client.go +++ b/kv/memberlist/memberlist_client.go @@ -400,6 +400,13 @@ func (m *KV) buildMemberlistConfig() (*memberlist.Config, error) { // Memberlist uses UDPBufferSize to figure out how many messages it can put into single "packet". // As we don't use UDP for sending packets, we can use higher value here. mlCfg.UDPBufferSize = 10 * 1024 * 1024 + + // For our use cases, we don't need a very fast detection of dead nodes. Since we use a TCP transport + // and we open a new TCP connection for each packet, we prefer to reduce the probe frequency and increase + // the timeout compared to defaults. + mlCfg.ProbeInterval = 5 * time.Second // Probe a random node every this interval. This setting is also the total timeout for the direct + indirect probes. + mlCfg.ProbeTimeout = 2 * time.Second // Timeout for the direct probe. + return mlCfg, nil }