-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix minReadySeconds for DC #14954
Fix minReadySeconds for DC #14954
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,20 +7,16 @@ import ( | |
"sync" | ||
"time" | ||
|
||
"github.com/golang/glog" | ||
|
||
kerrors "k8s.io/apimachinery/pkg/api/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/fields" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
utilerrors "k8s.io/apimachinery/pkg/util/errors" | ||
"k8s.io/apimachinery/pkg/util/sets" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/apimachinery/pkg/watch" | ||
"k8s.io/client-go/tools/cache" | ||
kapi "k8s.io/kubernetes/pkg/api" | ||
kapipod "k8s.io/kubernetes/pkg/api/pod" | ||
kcoreclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/internalversion" | ||
|
||
"github.com/openshift/origin/pkg/client" | ||
|
@@ -481,117 +477,53 @@ func newPodWatch(client kcoreclient.PodInterface, namespace, name, resourceVersi | |
// NewAcceptAvailablePods makes a new acceptAvailablePods from a real client. | ||
func NewAcceptAvailablePods( | ||
out io.Writer, | ||
kclient kcoreclient.PodsGetter, | ||
kclient kcoreclient.ReplicationControllersGetter, | ||
timeout time.Duration, | ||
interval time.Duration, | ||
minReadySeconds int32, | ||
) *acceptAvailablePods { | ||
|
||
return &acceptAvailablePods{ | ||
out: out, | ||
timeout: timeout, | ||
interval: interval, | ||
minReadySeconds: minReadySeconds, | ||
acceptedPods: sets.NewString(), | ||
getRcPodStore: func(rc *kapi.ReplicationController) (cache.Store, chan struct{}) { | ||
selector := labels.Set(rc.Spec.Selector).AsSelector() | ||
store := cache.NewStore(cache.MetaNamespaceKeyFunc) | ||
lw := &cache.ListWatch{ | ||
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { | ||
options.LabelSelector = selector.String() | ||
return kclient.Pods(rc.Namespace).List(options) | ||
}, | ||
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { | ||
options.LabelSelector = selector.String() | ||
return kclient.Pods(rc.Namespace).Watch(options) | ||
}, | ||
} | ||
stop := make(chan struct{}) | ||
cache.NewReflector(lw, &kapi.Pod{}, store, 10*time.Second).RunUntil(stop) | ||
return store, stop | ||
}, | ||
out: out, | ||
kclient: kclient, | ||
timeout: timeout, | ||
} | ||
} | ||
|
||
// acceptAvailablePods will accept a replication controller if all the pods | ||
// for the replication controller become available. | ||
// | ||
// acceptAvailablePods keeps track of the pods it has accepted for a | ||
// replication controller so that the acceptor can be reused across multiple | ||
// batches of updates to a single controller. For example, if during the first | ||
// acceptance call the replication controller has 3 pods, the acceptor will | ||
// validate those 3 pods. If the same acceptor instance is used again for the | ||
// same replication controller which now has 6 pods, only the latest 3 pods | ||
// will be considered for acceptance. The status of the original 3 pods becomes | ||
// irrelevant. | ||
// | ||
// Note that this struct is stateful and intended for use with a single | ||
// replication controller and should be discarded and recreated between | ||
// rollouts. | ||
type acceptAvailablePods struct { | ||
out io.Writer | ||
// getRcPodStore should return a Store containing all the pods for the | ||
// replication controller, and a channel to stop whatever process is | ||
// feeding the store. | ||
getRcPodStore func(*kapi.ReplicationController) (cache.Store, chan struct{}) | ||
// timeout is how long to wait for pod readiness. | ||
out io.Writer | ||
kclient kcoreclient.ReplicationControllersGetter | ||
// timeout is how long to wait for pods to become available from ready state. | ||
timeout time.Duration | ||
// interval is how often to check for pod readiness | ||
interval time.Duration | ||
// minReadySeconds is the minimum number of seconds for which a newly created | ||
// pod should be ready without any of its container crashing, for it to be | ||
// considered available. | ||
minReadySeconds int32 | ||
// acceptedPods keeps track of pods which have been previously accepted for | ||
// a replication controller. | ||
acceptedPods sets.String | ||
} | ||
|
||
// Accept all pods for a replication controller once they are available. | ||
func (c *acceptAvailablePods) Accept(rc *kapi.ReplicationController) error { | ||
// Make a pod store to poll and ensure it gets cleaned up. | ||
podStore, stopStore := c.getRcPodStore(rc) | ||
defer close(stopStore) | ||
allReplicasAvailable := func(r *kapi.ReplicationController) bool { | ||
return r.Status.AvailableReplicas == r.Spec.Replicas | ||
} | ||
|
||
// Start checking for pod updates. | ||
if c.acceptedPods.Len() > 0 { | ||
fmt.Fprintf(c.out, "--> Waiting up to %s for pods in rc %s to become ready (%d pods previously accepted)\n", c.timeout, rc.Name, c.acceptedPods.Len()) | ||
} else { | ||
fmt.Fprintf(c.out, "--> Waiting up to %s for pods in rc %s to become ready\n", c.timeout, rc.Name) | ||
} | ||
err := wait.Poll(c.interval, c.timeout, func() (done bool, err error) { | ||
// Check for pod readiness. | ||
unready := sets.NewString() | ||
for _, obj := range podStore.List() { | ||
pod := obj.(*kapi.Pod) | ||
// Skip previously accepted pods; we only want to verify newly observed | ||
// and unaccepted pods. | ||
if c.acceptedPods.Has(pod.Name) { | ||
continue | ||
} | ||
if kapipod.IsPodAvailable(pod, c.minReadySeconds, metav1.NewTime(time.Now())) { | ||
// If the pod is ready, track it as accepted. | ||
c.acceptedPods.Insert(pod.Name) | ||
} else { | ||
// Otherwise, track it as unready. | ||
unready.Insert(pod.Name) | ||
} | ||
} | ||
// Check to see if we're done. | ||
if unready.Len() == 0 { | ||
return true, nil | ||
if allReplicasAvailable(rc) { | ||
return nil | ||
} | ||
|
||
watcher, err := c.kclient.ReplicationControllers(rc.Namespace).Watch(metav1.SingleObject(metav1.ObjectMeta{Name: rc.Name, ResourceVersion: rc.ResourceVersion})) | ||
if err != nil { | ||
return fmt.Errorf("acceptAvailablePods failed to watch ReplicationController %s/%s: %v", rc.Namespace, rc.Name, err) | ||
} | ||
|
||
_, err = watch.Until(c.timeout, watcher, func(event watch.Event) (bool, error) { | ||
if t := event.Type; t != watch.Modified { | ||
return false, fmt.Errorf("acceptAvailablePods failed watching for ReplicationController %s/%s: received event %v", rc.Namespace, rc.Name, t) | ||
} | ||
// Otherwise, try again later. | ||
glog.V(4).Infof("Still waiting for %d pods to become ready for rc %s", unready.Len(), rc.Name) | ||
return false, nil | ||
newRc := event.Object.(*kapi.ReplicationController) | ||
return allReplicasAvailable(newRc), nil | ||
}) | ||
|
||
// Handle acceptance failure. | ||
if err != nil { | ||
if err == wait.ErrWaitTimeout { | ||
return fmt.Errorf("pods for rc %q took longer than %.f seconds to become ready", rc.Name, c.timeout.Seconds()) | ||
return fmt.Errorf("pods for rc '%s/%s' took longer than %.f seconds to become available", rc.Namespace, rc.Name, c.timeout.Seconds()) | ||
} | ||
return fmt.Errorf("pod readiness check failed for rc %q: %v", rc.Name, err) | ||
return err | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why removing the context from the error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because in current context it feels misleading in what it's saying. Other things might have failed here, not just "pod readiness check". I am open to suggestions if we want to reformat the error here and not leave it on the caller. |
||
} | ||
return nil | ||
} |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Kargakis I know what you are about to say :), PTO and certification got into way of fixing watch.Until I'll get back to working on it.
Well, it used
WATCH
even in previous implementation.