From 926a3f50a6826f0881e64e9b2616ad2973ea3d45 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Mon, 16 Oct 2017 16:36:18 -0500 Subject: [PATCH] UPSTREAM: 53167: Do not GC exited containers in running pods --- .../k8s.io/kubernetes/pkg/kubelet/kubelet.go | 1 + .../kubernetes/pkg/kubelet/kubelet_pods.go | 10 ++++ .../kuberuntime/fake_kuberuntime_manager.go | 17 +++++- .../pkg/kubelet/kuberuntime/kuberuntime_gc.go | 30 +++++----- .../kuberuntime/kuberuntime_gc_test.go | 59 +++++++++++-------- .../kuberuntime/kuberuntime_manager.go | 8 ++- 6 files changed, 83 insertions(+), 42 deletions(-) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go index 96d180faf09b..7a0bb8758e0d 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go @@ -621,6 +621,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub containerRefManager, machineInfo, klet.podManager, + klet, kubeDeps.OSInterface, klet, httpClient, diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go index f81cacead5d0..bc5bf9452439 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go @@ -743,6 +743,16 @@ func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool { return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses)) } +// IsPodTerminated returns trus if the pod with the provided UID is in a terminated state ("Failed" or "Succeeded") +// or if the pod has been deleted or removed +func (kl *Kubelet) IsPodTerminated(uid types.UID) bool { + pod, podFound := kl.podManager.GetPodByUID(uid) + if !podFound { + return true + } + return kl.podIsTerminated(pod) +} + // PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have // been reclaimed by the kubelet. Reclaiming resources is a prerequisite to deleting a pod from the API server. func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool { diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go index dca959a5e94e..072d79af7182 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go @@ -56,6 +56,21 @@ func (f *fakePodGetter) GetPodByUID(uid types.UID) (*v1.Pod, bool) { return pod, found } +type fakePodStateProvider struct { + runningPods map[types.UID]struct{} +} + +func newFakePodStateProvider() *fakePodStateProvider { + return &fakePodStateProvider{ + runningPods: make(map[types.UID]struct{}), + } +} + +func (f *fakePodStateProvider) IsPodTerminated(uid types.UID) bool { + _, found := f.runningPods[uid] + return !found +} + func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) { recorder := &record.FakeRecorder{} kubeRuntimeManager := &kubeGenericRuntimeManager{ @@ -76,7 +91,7 @@ func NewFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS return nil, err } - kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodGetter(), kubeRuntimeManager) + kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, newFakePodGetter(), newFakePodStateProvider(), kubeRuntimeManager) kubeRuntimeManager.runtimeName = typedVersion.RuntimeName kubeRuntimeManager.imagePuller = images.NewImageManager( kubecontainer.FilterEventRecorder(recorder), diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go index fe8f1465945d..7b38f9774ab9 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -32,17 +32,19 @@ import ( // containerGC is the manager of garbage collection. type containerGC struct { - client internalapi.RuntimeService - manager *kubeGenericRuntimeManager - podGetter podGetter + client internalapi.RuntimeService + manager *kubeGenericRuntimeManager + podGetter podGetter + podStateProvider podStateProvider } // NewContainerGC creates a new containerGC. -func NewContainerGC(client internalapi.RuntimeService, podGetter podGetter, manager *kubeGenericRuntimeManager) *containerGC { +func NewContainerGC(client internalapi.RuntimeService, podGetter podGetter, podStateProvider podStateProvider, manager *kubeGenericRuntimeManager) *containerGC { return &containerGC{ - client: client, - manager: manager, - podGetter: podGetter, + client: client, + manager: manager, + podGetter: podGetter, + podStateProvider: podStateProvider, } } @@ -209,7 +211,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE } // evict all containers that are evictable -func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error { +func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error { // Separate containers by evict units. evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge) if err != nil { @@ -219,7 +221,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy // Remove deleted pod containers if all sources are ready. if allSourcesReady { for key, unit := range evictUnits { - if cgc.isPodDeleted(key.uid) || evictNonDeletedPods { + if cgc.isPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) { cgc.removeOldestN(unit, len(unit)) // Remove all. delete(evictUnits, key) } @@ -261,7 +263,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy // 2. contains no containers. // 3. belong to a non-existent (i.e., already removed) pod, or is not the // most recently created sandbox for the pod. -func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error { +func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error { containers, err := cgc.manager.getKubeletContainers(true) if err != nil { return err @@ -307,7 +309,7 @@ func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error { } for podUID, sandboxes := range sandboxesByPod { - if cgc.isPodDeleted(podUID) || evictNonDeletedPods { + if cgc.isPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) { // Remove all evictable sandboxes if the pod has been removed. // Note that the latest dead sandbox is also removed if there is // already an active one. @@ -367,14 +369,14 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error { // * removes oldest dead containers by enforcing gcPolicy.MaxContainers. // * gets evictable sandboxes which are not ready and contains no containers. // * removes evictable sandboxes. -func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error { +func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error { // Remove evictable containers - if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil { + if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil { return err } // Remove sandboxes with zero containers - if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil { + if err := cgc.evictSandboxes(evictTerminatedPods); err != nil { return err } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go index b32cd9428464..8053b7fcb510 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go @@ -67,7 +67,7 @@ func TestSandboxGC(t *testing.T) { containers []containerTemplate // templates of containers minAge time.Duration // sandboxMinGCAge remain []int // template indexes of remaining sandboxes - evictNonDeletedPods bool + evictTerminatedPods bool }{ { description: "notready sandboxes without containers for deleted pods should be garbage collected.", @@ -76,7 +76,7 @@ func TestSandboxGC(t *testing.T) { }, containers: []containerTemplate{}, remain: []int{}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "ready sandboxes without containers for deleted pods should not be garbage collected.", @@ -85,7 +85,7 @@ func TestSandboxGC(t *testing.T) { }, containers: []containerTemplate{}, remain: []int{0}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "sandboxes for existing pods should not be garbage collected.", @@ -95,17 +95,17 @@ func TestSandboxGC(t *testing.T) { }, containers: []containerTemplate{}, remain: []int{0, 1}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { - description: "non-running sandboxes for existing pods should be garbage collected if evictNonDeletedPods is set.", + description: "non-running sandboxes for existing pods should be garbage collected if evictTerminatedPods is set.", sandboxes: []sandboxTemplate{ makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0), makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0), }, containers: []containerTemplate{}, remain: []int{0}, - evictNonDeletedPods: true, + evictTerminatedPods: true, }, { description: "sandbox with containers should not be garbage collected.", @@ -116,7 +116,7 @@ func TestSandboxGC(t *testing.T) { {pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED}, }, remain: []int{0}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "multiple sandboxes should be handled properly.", @@ -136,7 +136,7 @@ func TestSandboxGC(t *testing.T) { {pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED}, }, remain: []int{0, 2}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, } { t.Logf("TestCase #%d: %+v", c, test) @@ -145,7 +145,7 @@ func TestSandboxGC(t *testing.T) { fakeRuntime.SetFakeSandboxes(fakeSandboxes) fakeRuntime.SetFakeContainers(fakeContainers) - err := m.containerGC.evictSandboxes(test.evictNonDeletedPods) + err := m.containerGC.evictSandboxes(test.evictTerminatedPods) assert.NoError(t, err) realRemain, err := fakeRuntime.ListPodSandbox(nil) assert.NoError(t, err) @@ -163,9 +163,13 @@ func TestContainerGC(t *testing.T) { assert.NoError(t, err) fakePodGetter := m.containerGC.podGetter.(*fakePodGetter) + podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) makeGCContainer := func(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate { container := makeTestContainer(containerName, "test-image") pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container}) + if podName == "running" { + podStateProvider.runningPods[pod.UID] = struct{}{} + } if podName != "deleted" { // initialize the pod getter, explicitly exclude deleted pod fakePodGetter.pods[pod.UID] = pod @@ -185,7 +189,7 @@ func TestContainerGC(t *testing.T) { containers []containerTemplate // templates of containers policy *kubecontainer.ContainerGCPolicy // container gc policy remain []int // template indexes of remaining containers - evictNonDeletedPods bool + evictTerminatedPods bool }{ { description: "all containers should be removed when max container limit is 0", @@ -194,7 +198,7 @@ func TestContainerGC(t *testing.T) { }, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0}, remain: []int{}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "max containers should be complied when no max per pod container limit is set", @@ -207,7 +211,7 @@ func TestContainerGC(t *testing.T) { }, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4}, remain: []int{0, 1, 2, 3}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "no containers should be removed if both max container and per pod container limits are not set", @@ -218,7 +222,7 @@ func TestContainerGC(t *testing.T) { }, policy: &kubecontainer.ContainerGCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1}, remain: []int{0, 1, 2}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "recently started containers should not be removed", @@ -228,7 +232,7 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1, 2}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "oldest containers should be removed when per pod container limit exceeded", @@ -238,7 +242,7 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "running containers should not be removed", @@ -248,7 +252,7 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), }, remain: []int{0, 1, 2}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "no containers should be removed when limits are not exceeded", @@ -257,7 +261,7 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "max container count should apply per (UID, container) pair", @@ -273,7 +277,7 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1, 3, 4, 6, 7}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "max limit should apply and try to keep from every pod", @@ -290,7 +294,7 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 2, 4, 6, 8}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { description: "oldest pods should be removed if limit exceeded", @@ -307,20 +311,20 @@ func TestContainerGC(t *testing.T) { makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 2, 4, 6, 8, 9}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, { - description: "all non-running containers should be removed when evictNonDeletedPods is set", + description: "all non-running containers should be removed when evictTerminatedPods is set", containers: []containerTemplate{ makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), - makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), + makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED), makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING), }, - remain: []int{5}, - evictNonDeletedPods: true, + remain: []int{4, 5}, + evictTerminatedPods: true, }, { description: "containers for deleted pods should be removed", @@ -333,7 +337,7 @@ func TestContainerGC(t *testing.T) { makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED), }, remain: []int{0, 1, 2}, - evictNonDeletedPods: false, + evictTerminatedPods: false, }, } { t.Logf("TestCase #%d: %+v", c, test) @@ -343,7 +347,7 @@ func TestContainerGC(t *testing.T) { if test.policy == nil { test.policy = &defaultGCPolicy } - err := m.containerGC.evictContainers(*test.policy, true, test.evictNonDeletedPods) + err := m.containerGC.evictContainers(*test.policy, true, test.evictTerminatedPods) assert.NoError(t, err) realRemain, err := fakeRuntime.ListContainers(nil) assert.NoError(t, err) @@ -362,10 +366,13 @@ func TestPodLogDirectoryGC(t *testing.T) { assert.NoError(t, err) fakeOS := m.osInterface.(*containertest.FakeOS) fakePodGetter := m.containerGC.podGetter.(*fakePodGetter) + podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider) // pod log directories without corresponding pods should be removed. fakePodGetter.pods["123"] = makeTestPod("foo1", "new", "123", nil) fakePodGetter.pods["456"] = makeTestPod("foo2", "new", "456", nil) + podStateProvider.runningPods["123"] = struct{}{} + podStateProvider.runningPods["456"] = struct{}{} files := []string{"123", "456", "789", "012"} removed := []string{filepath.Join(podLogsRootDirectory, "789"), filepath.Join(podLogsRootDirectory, "012")} diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 636f4b724caa..a047d0d5be05 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -69,6 +69,11 @@ type podGetter interface { GetPodByUID(kubetypes.UID) (*v1.Pod, bool) } +// podStateProvider can determine if a pod is deleted ir terminated +type podStateProvider interface { + IsPodTerminated(kubetypes.UID) bool +} + type kubeGenericRuntimeManager struct { runtimeName string recorder record.EventRecorder @@ -120,6 +125,7 @@ func NewKubeGenericRuntimeManager( containerRefManager *kubecontainer.RefManager, machineInfo *cadvisorapi.MachineInfo, podGetter podGetter, + podStateProvider podStateProvider, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, httpClient types.HttpGetter, @@ -182,7 +188,7 @@ func NewKubeGenericRuntimeManager( imagePullQPS, imagePullBurst) kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager) - kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podGetter, kubeRuntimeManager) + kubeRuntimeManager.containerGC = NewContainerGC(runtimeService, podGetter, podStateProvider, kubeRuntimeManager) kubeRuntimeManager.versionCache = cache.NewObjectCache( func() (interface{}, error) {