Skip to content

Commit

Permalink
Merge pull request #3774 from tstromberg/wait-longer
Browse files Browse the repository at this point in the history
Improve reliability of kube-proxy configmap updates (retry, block until pods are up)
  • Loading branch information
tstromberg authored Mar 1, 2019
2 parents dffdd82 + b9675da commit 42863bf
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 37 deletions.
68 changes: 64 additions & 4 deletions pkg/minikube/bootstrapper/kubeadm/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
download "github.com/jimmidyson/go-download"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/minikube/pkg/minikube/assets"
"k8s.io/minikube/pkg/minikube/bootstrapper"
"k8s.io/minikube/pkg/minikube/config"
Expand Down Expand Up @@ -64,6 +65,24 @@ var SkipPreflights = []string{
"CRI",
}

type pod struct {
// Human friendly name
name string
key string
value string
}

// PodsByLayer are queries we run when health checking, sorted roughly by dependency layer
var PodsByLayer = []pod{
{"apiserver", "component", "kube-apiserver"},
{"proxy", "k8s-app", "kube-proxy"},
{"etcd", "component", "etcd"},
{"scheduler", "component", "kube-scheduler"},
{"controller", "component", "kube-controller-manager"},
{"addon-manager", "component", "kube-addon-manager"},
{"dns", "k8s-app", "kube-dns"},
}

// SkipAdditionalPreflights are additional preflights we skip depending on the runtime in use.
var SkipAdditionalPreflights = map[string][]string{}

Expand Down Expand Up @@ -175,12 +194,20 @@ func (k *KubeadmBootstrapper) StartCluster(k8s config.KubernetesConfig) error {
}
}

// NOTE: We have not yet asserted that we can access the apiserver. Now would be a great time to do so.
if err := waitForPods(false); err != nil {
return errors.Wrap(err, "wait")
}

console.OutStyle("permissions", "Configuring cluster permissions ...")
if err := util.RetryAfter(100, elevateKubeSystemPrivileges, time.Millisecond*500); err != nil {
return errors.Wrap(err, "timed out waiting to elevate kube-system RBAC privileges")
}

// Make sure elevating privileges didn't screw anything up
if err := waitForPods(true); err != nil {
return errors.Wrap(err, "wait")
}

return nil
}

Expand All @@ -204,6 +231,31 @@ func addAddons(files *[]assets.CopyableFile) error {
return nil
}

// waitForPods waits until the important Kubernetes pods are in running state
func waitForPods(quiet bool) error {
if !quiet {
console.OutStyle("waiting-pods", "Waiting for pods:")
}
client, err := util.GetClient()
if err != nil {
return errors.Wrap(err, "k8s client")
}

for _, p := range PodsByLayer {
if !quiet {
console.Out(" %s", p.name)
}
selector := labels.SelectorFromSet(labels.Set(map[string]string{p.key: p.value}))
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
return errors.Wrap(err, fmt.Sprintf("waiting for %s=%s", p.key, p.value))
}
}
if !quiet {
console.OutLn("")
}
return nil
}

// RestartCluster restarts the Kubernetes cluster configured by kubeadm
func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error {
version, err := ParseKubernetesVersion(k8s.KubernetesVersion)
Expand Down Expand Up @@ -232,12 +284,20 @@ func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error
}
}

// NOTE: Perhaps now would be a good time to check apiserver health?
console.OutStyle("waiting", "Waiting for kube-proxy to come back up ...")
if err := restartKubeProxy(k8s); err != nil {
if err := waitForPods(false); err != nil {
return errors.Wrap(err, "wait")
}

console.OutStyle("reconfiguring", "Updating kube-proxy configuration ...")
if err = util.RetryAfter(5, func() error { return updateKubeProxyConfigMap(k8s) }, 5*time.Second); err != nil {
return errors.Wrap(err, "restarting kube-proxy")
}

// Make sure the kube-proxy restart didn't screw anything up.
if err := waitForPods(true); err != nil {
return errors.Wrap(err, "wait")
}

return nil
}

Expand Down
34 changes: 26 additions & 8 deletions pkg/minikube/bootstrapper/kubeadm/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import (
clientv1 "k8s.io/api/core/v1"
rbacv1beta1 "k8s.io/api/rbac/v1beta1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
Expand Down Expand Up @@ -155,22 +155,23 @@ users:
`
)

func restartKubeProxy(k8s config.KubernetesConfig) error {
// updateKubeProxyConfigMap updates the IP & port kube-proxy listens on, and restarts it.
func updateKubeProxyConfigMap(k8s config.KubernetesConfig) error {
client, err := util.GetClient()
if err != nil {
return errors.Wrap(err, "getting k8s client")
}

selector := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": "kube-proxy"}))
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
return errors.Wrap(err, "waiting for kube-proxy to be up for configmap update")
return errors.Wrap(err, "kube-proxy not running")
}

cfgMap, err := client.CoreV1().ConfigMaps("kube-system").Get("kube-proxy", metav1.GetOptions{})
if err != nil {
return errors.Wrap(err, "getting kube-proxy configmap")
return &util.RetriableError{Err: errors.Wrap(err, "getting kube-proxy configmap")}
}

glog.Infof("kube-proxy config: %v", cfgMap.Data[kubeconfigConf])
t := template.Must(template.New("kubeProxyTmpl").Parse(kubeProxyConfigmapTmpl))
opts := struct {
AdvertiseAddress string
Expand All @@ -188,10 +189,21 @@ func restartKubeProxy(k8s config.KubernetesConfig) error {
if cfgMap.Data == nil {
cfgMap.Data = map[string]string{}
}
cfgMap.Data[kubeconfigConf] = strings.TrimSuffix(kubeconfig.String(), "\n")

updated := strings.TrimSuffix(kubeconfig.String(), "\n")
glog.Infof("updated kube-proxy config: %s", updated)

// An optimization, but also one that's unlikely, as kubeadm writes the address as 'localhost'
if cfgMap.Data[kubeconfigConf] == updated {
glog.Infof("kube-proxy config appears to require no change, not restarting kube-proxy")
return nil
}
cfgMap.Data[kubeconfigConf] = updated

// Make this step retriable, as it can fail with:
// "Operation cannot be fulfilled on configmaps "kube-proxy": the object has been modified; please apply your changes to the latest version and try again"
if _, err := client.CoreV1().ConfigMaps("kube-system").Update(cfgMap); err != nil {
return errors.Wrap(err, "updating configmap")
return &util.RetriableError{Err: errors.Wrap(err, "updating configmap")}
}

pods, err := client.CoreV1().Pods("kube-system").List(metav1.ListOptions{
Expand All @@ -201,10 +213,16 @@ func restartKubeProxy(k8s config.KubernetesConfig) error {
return errors.Wrap(err, "listing kube-proxy pods")
}
for _, pod := range pods.Items {
// Retriable, as known to fail with: pods "<name>" not found
if err := client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{}); err != nil {
return errors.Wrapf(err, "deleting pod %+v", pod)
return &util.RetriableError{Err: errors.Wrapf(err, "deleting pod %+v", pod)}
}
}

// Wait for the scheduler to restart kube-proxy
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
return errors.Wrap(err, "kube-proxy not running")
}

return nil
}
48 changes: 25 additions & 23 deletions pkg/minikube/console/style.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,31 @@ type style struct {
// styles is a map of style name to style struct
// For consistency, ensure that emojis added render with the same width across platforms.
var styles = map[string]style{
"happy": {Prefix: "😄 ", LowPrefix: "o "},
"success": {Prefix: "✅ "},
"failure": {Prefix: "❌ ", LowPrefix: "X "},
"conflict": {Prefix: "💥 ", LowPrefix: "x "},
"fatal": {Prefix: "💣 ", LowPrefix: "! "},
"notice": {Prefix: "📌 ", LowPrefix: "* "},
"ready": {Prefix: "🏄 ", LowPrefix: "= "},
"running": {Prefix: "🏃 ", LowPrefix: ": "},
"provisioning": {Prefix: "🌱 ", LowPrefix: "> "},
"restarting": {Prefix: "🔄 ", LowPrefix: ": "},
"stopping": {Prefix: "✋ ", LowPrefix: ": "},
"stopped": {Prefix: "🛑 "},
"warning": {Prefix: "⚠️ ", LowPrefix: "! "},
"waiting": {Prefix: "⌛ ", LowPrefix: ": "},
"usage": {Prefix: "💡 "},
"launch": {Prefix: "🚀 "},
"sad": {Prefix: "😿 ", LowPrefix: "* "},
"thumbs-up": {Prefix: "👍 "},
"option": {Prefix: " ▪ "}, // Indented bullet
"command": {Prefix: " ▪ "}, // Indented bullet
"log-entry": {Prefix: " "}, // Indent
"crushed": {Prefix: "💔 "},
"url": {Prefix: "👉 "},
"happy": {Prefix: "😄 ", LowPrefix: "o "},
"success": {Prefix: "✅ "},
"failure": {Prefix: "❌ ", LowPrefix: "X "},
"conflict": {Prefix: "💥 ", LowPrefix: "x "},
"fatal": {Prefix: "💣 ", LowPrefix: "! "},
"notice": {Prefix: "📌 ", LowPrefix: "* "},
"ready": {Prefix: "🏄 ", LowPrefix: "= "},
"running": {Prefix: "🏃 ", LowPrefix: ": "},
"provisioning": {Prefix: "🌱 ", LowPrefix: "> "},
"restarting": {Prefix: "🔄 ", LowPrefix: ": "},
"reconfiguring": {Prefix: "📯 ", LowPrefix: ": "},
"stopping": {Prefix: "✋ ", LowPrefix: ": "},
"stopped": {Prefix: "🛑 "},
"warning": {Prefix: "⚠️ ", LowPrefix: "! "},
"waiting": {Prefix: "⌛ ", LowPrefix: ": "},
"waiting-pods": {Prefix: "⌛ ", LowPrefix: ": ", OmitNewline: true},
"usage": {Prefix: "💡 "},
"launch": {Prefix: "🚀 "},
"sad": {Prefix: "😿 ", LowPrefix: "* "},
"thumbs-up": {Prefix: "👍 "},
"option": {Prefix: " ▪ "}, // Indented bullet
"command": {Prefix: " ▪ "}, // Indented bullet
"log-entry": {Prefix: " "}, // Indent
"crushed": {Prefix: "💔 "},
"url": {Prefix: "👉 "},

// Specialized purpose styles
"iso-download": {Prefix: "💿 ", LowPrefix: "@ "},
Expand Down
6 changes: 4 additions & 2 deletions pkg/util/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
"github.com/golang/glog"
"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
Expand All @@ -39,8 +39,10 @@ import (
)

var (
// ReasonableMutateTime is how long to wait for basic object mutations, such as deletions, to show up
ReasonableMutateTime = time.Minute * 1
ReasonableStartTime = time.Minute * 5
// ReasonableStartTime is how long to wait for pods to start, considering dependency chains & slow networks.
ReasonableStartTime = time.Minute * 10
)

type PodStore struct {
Expand Down

0 comments on commit 42863bf

Please sign in to comment.