From a376379fae018d79881bbe12a9f0fb5ea446ed5d Mon Sep 17 00:00:00 2001 From: Qing Ju Date: Wed, 15 Jul 2020 19:53:53 -0700 Subject: [PATCH] Added e2e test for driver version upgrade --- .gitignore | 2 + pkg/driver/efs_watch_dog.go | 2 +- pkg/driver/efs_watch_dog_test.go | 2 +- test/e2e/README.md | 10 +- test/e2e/e2e.go | 227 +++++++++++++++++++++++++------ test/e2e/e2e_test.go | 1 + 6 files changed, 197 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index a1ead31eb..5eae3c0a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.swp bin/ vendor/ +/test/e2e/results/ +/test/.DS_Store diff --git a/pkg/driver/efs_watch_dog.go b/pkg/driver/efs_watch_dog.go index ad9f2b773..9d139bf3f 100644 --- a/pkg/driver/efs_watch_dog.go +++ b/pkg/driver/efs_watch_dog.go @@ -71,7 +71,7 @@ dns_name_suffix = sc2s.sgov.gov [mount-watchdog] enabled = true poll_interval_sec = 1 -unmount_grace_period_sec = 30 +unmount_grace_period_sec = 5 # Set client auth/access point certificate renewal rate. Minimum value is 1 minute. tls_cert_renewal_interval_min = 60 diff --git a/pkg/driver/efs_watch_dog_test.go b/pkg/driver/efs_watch_dog_test.go index 20c555d49..da31dc1e3 100644 --- a/pkg/driver/efs_watch_dog_test.go +++ b/pkg/driver/efs_watch_dog_test.go @@ -65,7 +65,7 @@ dns_name_suffix = sc2s.sgov.gov [mount-watchdog] enabled = true poll_interval_sec = 1 -unmount_grace_period_sec = 30 +unmount_grace_period_sec = 5 # Set client auth/access point certificate renewal rate. Minimum value is 1 minute. tls_cert_renewal_interval_min = 60 diff --git a/test/e2e/README.md b/test/e2e/README.md index 26a3d717b..e76f99d93 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -3,9 +3,15 @@ - kubernetes 1.14+ cluster whose workers (preferably 2 or more) can mount the Amazon EFS file system # Run + +## Run all CSI tests +```sh +go test ./test/e2e/ -v -kubeconfig=$HOME/.kube/config --region=us-west-2 --report-dir="./results" -ginkgo.focus="\[efs-csi\]" --cluster-name="cluster-name" +``` + +## Run single CSI test ```sh -go test -v -timeout 0 ./... -kubeconfig=$HOME/.kube/config -report-dir=$ARTIFACTS -ginkgo.focus="\[efs-csi\]" -ginkgo.skip="\[Disruptive\]" \ - -file-system-id=fs-c2a43e69 +go test ./test/e2e/ -v -kubeconfig=$HOME/.kube/config --region=us-west-2 --report-dir="./results" -ginkgo.focus="should continue reading/writing after the driver pod is upgraded from stable version" --cluster-name="cluster-name" ``` # Update dependencies diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 9acb83de7..d2202de6e 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -5,7 +5,9 @@ import ( "time" "github.com/onsi/ginkgo" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -13,6 +15,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" + clientgoappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" @@ -182,7 +185,7 @@ var _ = ginkgo.Describe("[efs-csi] EFS CSI", func() { f := framework.NewDefaultFramework("efs") ginkgo.Context(testsuites.GetDriverNameWithFeatureTags(driver), func() { - ginkgo.It("should mount different paths on same volume on same node", func() { + ginkgo.It("[efs] [nonTls] should mount different paths on same volume on same node", func() { ginkgo.By(fmt.Sprintf("Creating efs pvc & pv with no subpath")) pvcRoot, pvRoot, err := createEFSPVCPV(f.ClientSet, f.Namespace.Name, f.Namespace.Name+"-root", "/") framework.ExpectNoError(err, "creating efs pvc & pv with no subpath") @@ -211,58 +214,176 @@ var _ = ginkgo.Describe("[efs-csi] EFS CSI", func() { framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name), "waiting for pod running") }) - ginkgo.It("should continue reading/writing without hanging after the driver pod is restarted", func() { + ginkgo.It("[efs] [nonTls] [restart] should continue reading/writing without hanging after the driver pod is restarted", func() { ginkgo.By(fmt.Sprintf("Creating efs pvc & pv")) + // non-tls pvc, pv, err := createEFSPVCPV(f.ClientSet, f.Namespace.Name, f.Namespace.Name, "") framework.ExpectNoError(err, "creating efs pvc & pv") defer func() { _ = f.ClientSet.CoreV1().PersistentVolumes().Delete(pv.Name, &metav1.DeleteOptions{}) }() - node, err := e2enode.GetRandomReadySchedulableNode(f.ClientSet) - framework.ExpectNoError(err, "getting random ready schedulable node") - command := fmt.Sprintf("touch /mnt/volume1/%s-%s && trap exit TERM; while true; do sleep 1; done", f.Namespace.Name, time.Now().Format(time.RFC3339)) + node, pod := launchPod(f, pv, pvc) - ginkgo.By(fmt.Sprintf("Creating pod on node %q to mount pvc %q and run %q", node.Name, pvc.Name, command)) - pod := e2epod.MakePod(f.Namespace.Name, nil, []*v1.PersistentVolumeClaim{pvc}, false, command) - pod, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) - framework.ExpectNoError(err, "creating pod") - framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name), "waiting for pod running") + deleteDriverPodOnNode(f, node) + + validatePodVolumeAccessible(f, pod) + }) + + ginkgo.It("[efs] [tls] [restart] should continue reading/writing without hanging after the driver pod is restarted", func() { + ginkgo.By(fmt.Sprintf("Creating efs pvc & pv")) + path := "" + pvName := f.Namespace.Name + // tls + pv := makeEFSPVWithTLS(pvName, path) + pv, pvc := createPvPvcWithPv(f, pv) + defer func() { f.ClientSet.CoreV1().PersistentVolumes().Delete(pv.Name, &metav1.DeleteOptions{}) }() + + node, pod := launchPod(f, pv, pvc) + + deleteDriverPodOnNode(f, node) + + validatePodVolumeAccessible(f, pod) + }) + + ginkgo.It("[efs] [github] [tls] [upgrade] should continue reading/writing after the driver pod is upgraded from stable version", func() { + ginkgo.By(fmt.Sprintf("Sleep to ensure the existing state files are removed by watch-dog so that older driver don't have to handle new state file format")) + time.Sleep(time.Second * 10) + + daemonSetsClient := f.ClientSet.AppsV1().DaemonSets("kube-system") + efsDriverUnderTest, err := daemonSetsClient.Get("efs-csi-node", metav1.GetOptions{}) + framework.ExpectNoError(err, "Getting the efs driver under test") - ginkgo.By(fmt.Sprintf("Getting driver pod on node %q", node.Name)) - labelSelector := labels.SelectorFromSet(labels.Set{"app": "efs-csi-node"}).String() - fieldSelector := fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String() - podList, err := f.ClientSet.CoreV1().Pods("kube-system").List( - metav1.ListOptions{ - LabelSelector: labelSelector, - FieldSelector: fieldSelector, - }) - framework.ExpectNoError(err, "getting driver pod") - framework.ExpectEqual(len(podList.Items), 1, "expected 1 efs csi node pod but got %d", len(podList.Items)) - driverPod := podList.Items[0] - - ginkgo.By(fmt.Sprintf("Deleting driver pod %q on node %q", driverPod.Name, node.Name)) - err = e2epod.DeletePodWithWaitByName(f.ClientSet, driverPod.Name, "kube-system") - framework.ExpectNoError(err, "deleting driver pod") - - ginkgo.By(fmt.Sprintf("Execing a write via the pod on node %q", node.Name)) - command = fmt.Sprintf("touch /mnt/volume1/%s-%s", f.Namespace.Name, time.Now().Format(time.RFC3339)) - done := make(chan bool) - go func() { - defer ginkgo.GinkgoRecover() - utils.VerifyExecInPodSucceed(f, pod, command) - done <- true + defer func() { + ginkgo.By("Restoring last applied EFS CSI driver") + updateDriverTo(daemonSetsClient, efsDriverUnderTest) }() - select { - case <-done: - framework.Logf("verified exec in pod succeeded") - case <-time.After(30 * time.Second): - framework.Failf("timed out verifying exec in pod succeeded") - } + + ginkgo.By("Deploying latest stable EFS CSI driver") + framework.RunKubectlOrDie("apply", "-k", "github.com/kubernetes-sigs/aws-efs-csi-driver/deploy/kubernetes/overlays/stable") + + ginkgo.By(fmt.Sprintf("Creating efs pvc & pv")) + path := "" + pvName := f.Namespace.Name + // tls + pv := makeEFSPVWithTLS(pvName, path) + pv, pvc := createPvPvcWithPv(f, pv) + defer func() { f.ClientSet.CoreV1().PersistentVolumes().Delete(pv.Name, &metav1.DeleteOptions{}) }() + + _, pod := launchPod(f, pv, pvc) + + ginkgo.By("Upgrading to the EFS CSI driver under test") + updateDriverTo(daemonSetsClient, efsDriverUnderTest) + + validatePodVolumeAccessible(f, pod) + }) + + ginkgo.It("[efs] [github] [nonTls] [upgrade] should continue reading/writing after the driver pod is upgraded from stable version", func() { + ginkgo.By(fmt.Sprintf("Sleep to ensure the existing state files are removed by watch-dog so that older driver don't have to handle new state file format")) + time.Sleep(time.Second * 10) + + daemonSetsClient := f.ClientSet.AppsV1().DaemonSets("kube-system") + efsDriverUnderTest, err := daemonSetsClient.Get("efs-csi-node", metav1.GetOptions{}) + framework.ExpectNoError(err, "Getting the efs driver under test") + + defer func() { + ginkgo.By("Restoring last applied EFS CSI driver") + updateDriverTo(daemonSetsClient, efsDriverUnderTest) + }() + + ginkgo.By("Deploying latest stable EFS CSI driver") + framework.RunKubectlOrDie("apply", "-k", "github.com/kubernetes-sigs/aws-efs-csi-driver/deploy/kubernetes/overlays/stable") + + ginkgo.By(fmt.Sprintf("Creating efs pvc & pv")) + path := "" + pvName := f.Namespace.Name + // non-tls + pv := makeEFSPvWithoutTLS(pvName, path) + pv, pvc := createPvPvcWithPv(f, pv) + defer func() { f.ClientSet.CoreV1().PersistentVolumes().Delete(pv.Name, &metav1.DeleteOptions{}) }() + + _, pod := launchPod(f, pv, pvc) + + ginkgo.By("Upgrading to the EFS CSI driver under test") + updateDriverTo(daemonSetsClient, efsDriverUnderTest) + + validatePodVolumeAccessible(f, pod) }) + }) }) +func deleteDriverPodOnNode(f *framework.Framework, node *v1.Node) { + ginkgo.By(fmt.Sprintf("Getting driver pod on node %q", node.Name)) + labelSelector := labels.SelectorFromSet(labels.Set{"app": "efs-csi-node"}).String() + fieldSelector := fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String() + podList, err := f.ClientSet.CoreV1().Pods("kube-system").List( + metav1.ListOptions{ + LabelSelector: labelSelector, + FieldSelector: fieldSelector, + }) + framework.ExpectNoError(err, "getting driver pod") + framework.ExpectEqual(len(podList.Items), 1, "expected 1 efs csi node pod but got %d", len(podList.Items)) + driverPod := podList.Items[0] + + ginkgo.By(fmt.Sprintf("Deleting driver pod %q on node %q", driverPod.Name, node.Name)) + err = e2epod.DeletePodWithWaitByName(f.ClientSet, driverPod.Name, "kube-system") + framework.ExpectNoError(err, "deleting driver pod") +} + +func createPvPvcWithPv(f *framework.Framework, pv *v1.PersistentVolume) (*v1.PersistentVolume, *v1.PersistentVolumeClaim) { + pvc, pv := makePVCWithPV(pv, f.Namespace.Name, f.Namespace.Name) + pvc, pv, err := createEFSPVPVC(f.ClientSet, f.Namespace.Name, pvc, pv) + framework.ExpectNoError(err, "creating efs pvc & pv") + + return pv, pvc +} + +func validatePodVolumeAccessible(f *framework.Framework, pod *v1.Pod) { + ginkgo.By(fmt.Sprintf("Execing a write to the pod %q in namespace %q", pod.Name, pod.Namespace)) + command := fmt.Sprintf("touch /mnt/volume1/%s-%s", f.Namespace.Name, time.Now().Format(time.RFC3339)) + done := make(chan bool) + go func() { + defer ginkgo.GinkgoRecover() + utils.VerifyExecInPodSucceed(f, pod, command) + done <- true + }() + select { + case <-done: + framework.Logf("verified exec in pod succeeded") + case <-time.After(30 * time.Second): + framework.Failf("timed out verifying exec in pod succeeded") + } +} + +func launchPod(f *framework.Framework, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim) (*v1.Node, *v1.Pod) { + node, err := e2enode.GetRandomReadySchedulableNode(f.ClientSet) + framework.ExpectNoError(err, "getting random ready schedulable node") + command := fmt.Sprintf("touch /mnt/volume1/%s-%s && trap exit TERM; while true; do sleep 1; done", f.Namespace.Name, time.Now().Format(time.RFC3339)) + + isPrivileged := false + var nodeSelector map[string]string + pod := e2epod.MakePod(f.Namespace.Name, nodeSelector, []*v1.PersistentVolumeClaim{pvc}, isPrivileged, command) + ginkgo.By(fmt.Sprintf("Creating pod on node %q to mount pvc %q at namespace %q, pv %q and run %q", node.Name, pvc.Name, f.Namespace.Name, pv.Name, command)) + pod, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) + framework.ExpectNoError(err, "creating pod") + framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, pod.Name, f.Namespace.Name), "waiting for pod running") + + return node, pod +} + +func updateDriverTo(daemonSetClient clientgoappsv1.DaemonSetInterface, driverUpgradeTo *appsv1.DaemonSet) { + ginkgo.By("Updating to the EFS CSI driver") + currentEfsDriver, err := daemonSetClient.Get("efs-csi-node", metav1.GetOptions{}) + driverUpgradeTo.ResourceVersion = currentEfsDriver.ResourceVersion + _, err = daemonSetClient.Update(driverUpgradeTo) + framework.ExpectNoError(err, "Updating to the EFS CSI driver") +} + func createEFSPVCPV(c clientset.Interface, namespace, name, path string) (*v1.PersistentVolumeClaim, *v1.PersistentVolume, error) { pvc, pv := makeEFSPVCPV(namespace, name, path) + return createEFSPVPVC(c, namespace, pvc, pv) +} + +func createEFSPVPVC(c clientset.Interface, namespace string, pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume) (*v1.PersistentVolumeClaim, *v1.PersistentVolume, error) { pvc, err := c.CoreV1().PersistentVolumeClaims(namespace).Create(pvc) if err != nil { return nil, nil, err @@ -275,8 +396,12 @@ func createEFSPVCPV(c clientset.Interface, namespace, name, path string) (*v1.Pe } func makeEFSPVCPV(namespace, name, path string) (*v1.PersistentVolumeClaim, *v1.PersistentVolume) { + pv := makeEFSPvWithoutTLS(name, path) + return makePVCWithPV(pv, namespace, name) +} + +func makePVCWithPV(pv *v1.PersistentVolume, namespace, name string) (*v1.PersistentVolumeClaim, *v1.PersistentVolume) { pvc := makeEFSPVC(namespace, name) - pv := makeEFSPV(name, path) pvc.Spec.VolumeName = pv.Name pv.Spec.ClaimRef = &v1.ObjectReference{ Namespace: pvc.Namespace, @@ -285,6 +410,20 @@ func makeEFSPVCPV(namespace, name, path string) (*v1.PersistentVolumeClaim, *v1. return pvc, pv } +func makeEFSPvWithoutTLS(name, path string) *v1.PersistentVolume { + var mountOptions []string + var volumeAttributes map[string]string + // TODO add "encryptInTransit" : "false" once https://github.com/kubernetes-sigs/aws-efs-csi-driver/pull/205 is merged. + // volumeAttributes := map[string]string{"encryptInTransit" : "false"} + return makeEFSPV(name, path, mountOptions, volumeAttributes) +} + +func makeEFSPVWithTLS(name, path string) *v1.PersistentVolume { + mountOptions := []string{"tls"} + var volumeAttributes map[string]string + return makeEFSPV(name, path, mountOptions, volumeAttributes) +} + func makeEFSPVC(namespace, name string) *v1.PersistentVolumeClaim { storageClassName := "" return &v1.PersistentVolumeClaim{ @@ -304,7 +443,7 @@ func makeEFSPVC(namespace, name string) *v1.PersistentVolumeClaim { } } -func makeEFSPV(name, path string) *v1.PersistentVolume { +func makeEFSPV(name, path string, mountOptions []string, volumeAttributes map[string]string) *v1.PersistentVolume { volumeHandle := FileSystemId if path != "" { volumeHandle += ":" + path @@ -318,10 +457,12 @@ func makeEFSPV(name, path string) *v1.PersistentVolume { Capacity: v1.ResourceList{ v1.ResourceStorage: resource.MustParse("1Gi"), }, + MountOptions: mountOptions, PersistentVolumeSource: v1.PersistentVolumeSource{ CSI: &v1.CSIPersistentVolumeSource{ - Driver: "efs.csi.aws.com", - VolumeHandle: volumeHandle, + Driver: "efs.csi.aws.com", + VolumeHandle: volumeHandle, + VolumeAttributes: volumeAttributes, }, }, AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteMany}, diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index df4da54ae..fa196e422 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -55,6 +55,7 @@ func init() { flag.StringVar(&ClusterName, "cluster-name", "", "the cluster name") flag.StringVar(&Region, "region", "us-west-2", "the region") + flag.StringVar(&FileSystemId, "file-system-id", "", "the file system id") flag.Parse() }