Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve reliability of kube-proxy configmap updates (retry, block until pods are up) #3774

Merged
merged 8 commits into from
Mar 1, 2019
68 changes: 64 additions & 4 deletions pkg/minikube/bootstrapper/kubeadm/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
download "github.com/jimmidyson/go-download"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/minikube/pkg/minikube/assets"
"k8s.io/minikube/pkg/minikube/bootstrapper"
"k8s.io/minikube/pkg/minikube/config"
Expand Down Expand Up @@ -64,6 +65,24 @@ var SkipPreflights = []string{
"CRI",
}

type pod struct {
// Human friendly name
name string
key string
value string
}

// PodsByLayer are queries we run when health checking, sorted roughly by dependency layer
var PodsByLayer = []pod{
{"apiserver", "component", "kube-apiserver"},
{"proxy", "k8s-app", "kube-proxy"},
{"etcd", "component", "etcd"},
{"scheduler", "component", "kube-scheduler"},
{"controller", "component", "kube-controller-manager"},
{"addon-manager", "component", "kube-addon-manager"},
{"dns", "k8s-app", "kube-dns"},
}

// SkipAdditionalPreflights are additional preflights we skip depending on the runtime in use.
var SkipAdditionalPreflights = map[string][]string{}

Expand Down Expand Up @@ -175,12 +194,20 @@ func (k *KubeadmBootstrapper) StartCluster(k8s config.KubernetesConfig) error {
}
}

// NOTE: We have not yet asserted that we can access the apiserver. Now would be a great time to do so.
if err := waitForPods(false); err != nil {
return errors.Wrap(err, "wait")
}

console.OutStyle("permissions", "Configuring cluster permissions ...")
if err := util.RetryAfter(100, elevateKubeSystemPrivileges, time.Millisecond*500); err != nil {
return errors.Wrap(err, "timed out waiting to elevate kube-system RBAC privileges")
}

// Make sure elevating privileges didn't screw anything up
if err := waitForPods(true); err != nil {
return errors.Wrap(err, "wait")
}

return nil
}

Expand All @@ -204,6 +231,31 @@ func addAddons(files *[]assets.CopyableFile) error {
return nil
}

// waitForPods waits until the important Kubernetes pods are in running state
func waitForPods(quiet bool) error {
if !quiet {
console.OutStyle("waiting-pods", "Waiting for pods:")
}
client, err := util.GetClient()
if err != nil {
return errors.Wrap(err, "k8s client")
}

for _, p := range PodsByLayer {
if !quiet {
console.Out(" %s", p.name)
}
selector := labels.SelectorFromSet(labels.Set(map[string]string{p.key: p.value}))
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
return errors.Wrap(err, fmt.Sprintf("waiting for %s=%s", p.key, p.value))
}
}
if !quiet {
console.OutLn("")
}
return nil
}

// RestartCluster restarts the Kubernetes cluster configured by kubeadm
func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error {
version, err := ParseKubernetesVersion(k8s.KubernetesVersion)
Expand Down Expand Up @@ -232,12 +284,20 @@ func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error
}
}

// NOTE: Perhaps now would be a good time to check apiserver health?
console.OutStyle("waiting", "Waiting for kube-proxy to come back up ...")
if err := restartKubeProxy(k8s); err != nil {
if err := waitForPods(false); err != nil {
return errors.Wrap(err, "wait")
}

console.OutStyle("reconfiguring", "Updating kube-proxy configuration ...")
if err = util.RetryAfter(5, func() error { return updateKubeProxyConfigMap(k8s) }, 5*time.Second); err != nil {
return errors.Wrap(err, "restarting kube-proxy")
}

// Make sure the kube-proxy restart didn't screw anything up.
if err := waitForPods(true); err != nil {
return errors.Wrap(err, "wait")
}

return nil
}

Expand Down
34 changes: 26 additions & 8 deletions pkg/minikube/bootstrapper/kubeadm/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import (
clientv1 "k8s.io/api/core/v1"
rbacv1beta1 "k8s.io/api/rbac/v1beta1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
Expand Down Expand Up @@ -155,22 +155,23 @@ users:
`
)

func restartKubeProxy(k8s config.KubernetesConfig) error {
// updateKubeProxyConfigMap updates the IP & port kube-proxy listens on, and restarts it.
func updateKubeProxyConfigMap(k8s config.KubernetesConfig) error {
client, err := util.GetClient()
if err != nil {
return errors.Wrap(err, "getting k8s client")
}

selector := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": "kube-proxy"}))
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
return errors.Wrap(err, "waiting for kube-proxy to be up for configmap update")
return errors.Wrap(err, "kube-proxy not running")
}

cfgMap, err := client.CoreV1().ConfigMaps("kube-system").Get("kube-proxy", metav1.GetOptions{})
if err != nil {
return errors.Wrap(err, "getting kube-proxy configmap")
return &util.RetriableError{Err: errors.Wrap(err, "getting kube-proxy configmap")}
}

glog.Infof("kube-proxy config: %v", cfgMap.Data[kubeconfigConf])
t := template.Must(template.New("kubeProxyTmpl").Parse(kubeProxyConfigmapTmpl))
opts := struct {
AdvertiseAddress string
Expand All @@ -188,10 +189,21 @@ func restartKubeProxy(k8s config.KubernetesConfig) error {
if cfgMap.Data == nil {
cfgMap.Data = map[string]string{}
}
cfgMap.Data[kubeconfigConf] = strings.TrimSuffix(kubeconfig.String(), "\n")

updated := strings.TrimSuffix(kubeconfig.String(), "\n")
glog.Infof("updated kube-proxy config: %s", updated)

// An optimization, but also one that's unlikely, as kubeadm writes the address as 'localhost'
if cfgMap.Data[kubeconfigConf] == updated {
glog.Infof("kube-proxy config appears to require no change, not restarting kube-proxy")
return nil
}
cfgMap.Data[kubeconfigConf] = updated

// Make this step retriable, as it can fail with:
// "Operation cannot be fulfilled on configmaps "kube-proxy": the object has been modified; please apply your changes to the latest version and try again"
if _, err := client.CoreV1().ConfigMaps("kube-system").Update(cfgMap); err != nil {
return errors.Wrap(err, "updating configmap")
return &util.RetriableError{Err: errors.Wrap(err, "updating configmap")}
}

pods, err := client.CoreV1().Pods("kube-system").List(metav1.ListOptions{
Expand All @@ -201,10 +213,16 @@ func restartKubeProxy(k8s config.KubernetesConfig) error {
return errors.Wrap(err, "listing kube-proxy pods")
}
for _, pod := range pods.Items {
// Retriable, as known to fail with: pods "<name>" not found
if err := client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{}); err != nil {
return errors.Wrapf(err, "deleting pod %+v", pod)
return &util.RetriableError{Err: errors.Wrapf(err, "deleting pod %+v", pod)}
}
}

// Wait for the scheduler to restart kube-proxy
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
return errors.Wrap(err, "kube-proxy not running")
}

return nil
}
48 changes: 25 additions & 23 deletions pkg/minikube/console/style.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,31 @@ type style struct {
// styles is a map of style name to style struct
// For consistency, ensure that emojis added render with the same width across platforms.
var styles = map[string]style{
"happy": {Prefix: "πŸ˜„ ", LowPrefix: "o "},
"success": {Prefix: "βœ… "},
"failure": {Prefix: "❌ ", LowPrefix: "X "},
"conflict": {Prefix: "πŸ’₯ ", LowPrefix: "x "},
"fatal": {Prefix: "πŸ’£ ", LowPrefix: "! "},
"notice": {Prefix: "πŸ“Œ ", LowPrefix: "* "},
"ready": {Prefix: "πŸ„ ", LowPrefix: "= "},
"running": {Prefix: "πŸƒ ", LowPrefix: ": "},
"provisioning": {Prefix: "🌱 ", LowPrefix: "> "},
"restarting": {Prefix: "πŸ”„ ", LowPrefix: ": "},
"stopping": {Prefix: "βœ‹ ", LowPrefix: ": "},
"stopped": {Prefix: "πŸ›‘ "},
"warning": {Prefix: "⚠️ ", LowPrefix: "! "},
"waiting": {Prefix: "βŒ› ", LowPrefix: ": "},
"usage": {Prefix: "πŸ’‘ "},
"launch": {Prefix: "πŸš€ "},
"sad": {Prefix: "😿 ", LowPrefix: "* "},
"thumbs-up": {Prefix: "πŸ‘ "},
"option": {Prefix: " β–ͺ "}, // Indented bullet
"command": {Prefix: " β–ͺ "}, // Indented bullet
"log-entry": {Prefix: " "}, // Indent
"crushed": {Prefix: "πŸ’” "},
"url": {Prefix: "πŸ‘‰ "},
"happy": {Prefix: "πŸ˜„ ", LowPrefix: "o "},
"success": {Prefix: "βœ… "},
"failure": {Prefix: "❌ ", LowPrefix: "X "},
"conflict": {Prefix: "πŸ’₯ ", LowPrefix: "x "},
"fatal": {Prefix: "πŸ’£ ", LowPrefix: "! "},
"notice": {Prefix: "πŸ“Œ ", LowPrefix: "* "},
"ready": {Prefix: "πŸ„ ", LowPrefix: "= "},
"running": {Prefix: "πŸƒ ", LowPrefix: ": "},
"provisioning": {Prefix: "🌱 ", LowPrefix: "> "},
"restarting": {Prefix: "πŸ”„ ", LowPrefix: ": "},
"reconfiguring": {Prefix: "πŸ“― ", LowPrefix: ": "},
"stopping": {Prefix: "βœ‹ ", LowPrefix: ": "},
"stopped": {Prefix: "πŸ›‘ "},
"warning": {Prefix: "⚠️ ", LowPrefix: "! "},
"waiting": {Prefix: "βŒ› ", LowPrefix: ": "},
"waiting-pods": {Prefix: "βŒ› ", LowPrefix: ": ", OmitNewline: true},
"usage": {Prefix: "πŸ’‘ "},
"launch": {Prefix: "πŸš€ "},
"sad": {Prefix: "😿 ", LowPrefix: "* "},
"thumbs-up": {Prefix: "πŸ‘ "},
"option": {Prefix: " β–ͺ "}, // Indented bullet
"command": {Prefix: " β–ͺ "}, // Indented bullet
"log-entry": {Prefix: " "}, // Indent
"crushed": {Prefix: "πŸ’” "},
"url": {Prefix: "πŸ‘‰ "},

// Specialized purpose styles
"iso-download": {Prefix: "πŸ’Ώ ", LowPrefix: "@ "},
Expand Down
6 changes: 4 additions & 2 deletions pkg/util/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
"github.com/golang/glog"
"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
Expand All @@ -39,8 +39,10 @@ import (
)

var (
// ReasonableMutateTime is how long to wait for basic object mutations, such as deletions, to show up
ReasonableMutateTime = time.Minute * 1
ReasonableStartTime = time.Minute * 5
// ReasonableStartTime is how long to wait for pods to start, considering dependency chains & slow networks.
ReasonableStartTime = time.Minute * 10
)

type PodStore struct {
Expand Down