diff --git a/CHANGELOG-3.6.md b/CHANGELOG-3.6.md index 5e07703993b..d3267857ff2 100644 --- a/CHANGELOG-3.6.md +++ b/CHANGELOG-3.6.md @@ -34,6 +34,7 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.0...v3.6.0). - Add [`etcd --log-format`](https://github.com/etcd-io/etcd/pull/13339) flag to support log format. - Fix [non mutating requests pass through quotaKVServer when NOSPACE](https://github.com/etcd-io/etcd/pull/13435) - Fix [exclude the same alarm type activated by multiple peers](https://github.com/etcd-io/etcd/pull/13467). +- Fix [Provide a better liveness probe for when etcd runs as a Kubernetes pod](https://github.com/etcd-io/etcd/pull/13399) ### tools/benchmark diff --git a/server/etcdserver/api/etcdhttp/metrics.go b/server/etcdserver/api/etcdhttp/metrics.go index 659ab806708..fedf2a9e33d 100644 --- a/server/etcdserver/api/etcdhttp/metrics.go +++ b/server/etcdserver/api/etcdhttp/metrics.go @@ -40,14 +40,16 @@ const ( // HandleMetricsHealth registers metrics and health handlers. func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) { mux.Handle(PathMetrics, promhttp.Handler()) - mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet) Health { return checkV2Health(lg, srv, excludedAlarms) })) + mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { return checkV2Health(lg, srv, excludedAlarms) })) } // HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request // and its corresponding timeout. func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) { mux.Handle(PathMetrics, promhttp.Handler()) - mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet) Health { return checkV3Health(lg, srv, excludedAlarms) })) + mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { + return checkV3Health(lg, srv, excludedAlarms, serializable) + })) } // HandlePrometheus registers prometheus handler on '/metrics'. @@ -56,7 +58,7 @@ func HandlePrometheus(mux *http.ServeMux) { } // NewHealthHandler handles '/health' requests. -func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet) Health) http.HandlerFunc { +func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet, Serializable bool) Health) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { w.Header().Set("Allow", http.MethodGet) @@ -65,7 +67,12 @@ func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet) Health return } excludedAlarms := getExcludedAlarms(r) - h := hfunc(excludedAlarms) + // Passing the query parameter "serializable=true" ensures that the + // health of the local etcd is checked vs the health of the cluster. + // This is useful for probes attempting to validate the liveness of + // the etcd process vs readiness of the cluster to serve requests. + serializableFlag := getSerializableFlag(r) + h := hfunc(excludedAlarms, serializableFlag) defer func() { if h.Health == "true" { healthSuccess.Inc() @@ -128,9 +135,13 @@ func getExcludedAlarms(r *http.Request) (alarms AlarmSet) { return alarms } +func getSerializableFlag(r *http.Request) bool { + return r.URL.Query().Get("serializable") == "true" +} + // TODO: etcdserver.ErrNoLeader in health API -func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health { +func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet, serializable bool) Health { h := Health{} h.Health = "true" as := srv.Alarms() @@ -156,7 +167,7 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSe } } - if uint64(srv.Leader()) == raft.None { + if !serializable && (uint64(srv.Leader()) == raft.None) { h.Health = "false" h.Reason = "RAFT NO LEADER" lg.Warn("serving /health false; no leader") @@ -166,7 +177,7 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSe } func checkV2Health(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) (h Health) { - if h = checkHealth(lg, srv, excludedAlarms); h.Health != "true" { + if h = checkHealth(lg, srv, excludedAlarms, false); h.Health != "true" { return } ctx, cancel := context.WithTimeout(context.Background(), time.Second) @@ -182,12 +193,12 @@ func checkV2Health(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms Alarm return } -func checkV3Health(lg *zap.Logger, srv *etcdserver.EtcdServer, excludedAlarms AlarmSet) (h Health) { - if h = checkHealth(lg, srv, excludedAlarms); h.Health != "true" { +func checkV3Health(lg *zap.Logger, srv *etcdserver.EtcdServer, excludedAlarms AlarmSet, serializable bool) (h Health) { + if h = checkHealth(lg, srv, excludedAlarms, serializable); h.Health != "true" { return } ctx, cancel := context.WithTimeout(context.Background(), srv.Cfg.ReqTimeout()) - _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1}) + _, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable}) cancel() if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied { h.Health = "false" diff --git a/server/proxy/grpcproxy/health.go b/server/proxy/grpcproxy/health.go index 1d6f7a2d8b9..882af4b46a8 100644 --- a/server/proxy/grpcproxy/health.go +++ b/server/proxy/grpcproxy/health.go @@ -31,7 +31,7 @@ func HandleHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) { if lg == nil { lg = zap.NewNop() } - mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet) etcdhttp.Health { return checkHealth(c) })) + mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkHealth(c) })) } // HandleProxyHealth registers health handler on '/proxy/health'. @@ -39,7 +39,7 @@ func HandleProxyHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) { if lg == nil { lg = zap.NewNop() } - mux.Handle(etcdhttp.PathProxyHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet) etcdhttp.Health { return checkProxyHealth(c) })) + mux.Handle(etcdhttp.PathProxyHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkProxyHealth(c) })) } func checkHealth(c *clientv3.Client) etcdhttp.Health {