diff --git a/pkg/cmd/tests/options.go b/pkg/cmd/tests/options.go index 6ba22a59d05..af673ddd666 100644 --- a/pkg/cmd/tests/options.go +++ b/pkg/cmd/tests/options.go @@ -13,6 +13,7 @@ import ( "github.com/scylladb/scylla-operator/test/e2e/framework" "github.com/spf13/cobra" apierrors "k8s.io/apimachinery/pkg/util/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/rest" ) @@ -42,26 +43,26 @@ var supportedBroadcastAddressTypes = []scyllav1.BroadcastAddressType{ type TestFrameworkOptions struct { genericclioptions.ClientConfigSet - ArtifactsDir string - DeleteTestingNSPolicyUntyped string - DeleteTestingNSPolicy framework.DeleteTestingNSPolicyType - IngressController *IngressControllerOptions - ScyllaClusterOptionsUntyped *ScyllaClusterOptions - scyllaClusterOptions *framework.ScyllaClusterOptions - ObjectStorageBucket string - GCSServiceAccountKeyPath string - S3CredentialsFilePath string - objectStorageType framework.ObjectStorageType - gcsServiceAccountKey []byte - s3CredentialsFile []byte + ArtifactsDir string + CleanupPolicyUntyped string + CleanupPolicy framework.CleanupPolicyType + IngressController *IngressControllerOptions + ScyllaClusterOptionsUntyped *ScyllaClusterOptions + scyllaClusterOptions *framework.ScyllaClusterOptions + ObjectStorageBucket string + GCSServiceAccountKeyPath string + S3CredentialsFilePath string + objectStorageType framework.ObjectStorageType + gcsServiceAccountKey []byte + s3CredentialsFile []byte } func NewTestFrameworkOptions(streams genericclioptions.IOStreams, userAgent string) *TestFrameworkOptions { return &TestFrameworkOptions{ - ClientConfigSet: genericclioptions.NewClientConfigSet(userAgent), - ArtifactsDir: "", - DeleteTestingNSPolicyUntyped: string(framework.DeleteTestingNSPolicyAlways), - IngressController: &IngressControllerOptions{}, + ClientConfigSet: genericclioptions.NewClientConfigSet(userAgent), + ArtifactsDir: "", + CleanupPolicyUntyped: string(framework.CleanupPolicyAlways), + IngressController: &IngressControllerOptions{}, ScyllaClusterOptionsUntyped: &ScyllaClusterOptions{ NodeServiceType: string(scyllav1.NodeServiceTypeHeadless), NodesBroadcastAddressType: string(scyllav1.BroadcastAddressTypePodIP), @@ -81,11 +82,20 @@ func (o *TestFrameworkOptions) AddFlags(cmd *cobra.Command) { o.ClientConfigSet.AddFlags(cmd) cmd.PersistentFlags().StringVarP(&o.ArtifactsDir, "artifacts-dir", "", o.ArtifactsDir, "A directory for storing test artifacts. No data is collected until set.") - cmd.PersistentFlags().StringVarP(&o.DeleteTestingNSPolicyUntyped, "delete-namespace-policy", "", o.DeleteTestingNSPolicyUntyped, fmt.Sprintf("Namespace deletion policy. Allowed values are [%s].", strings.Join( + cmd.PersistentFlags().StringVarP(&o.CleanupPolicyUntyped, "delete-namespace-policy", "", o.CleanupPolicyUntyped, fmt.Sprintf("Namespace deletion policy. Allowed values are [%s].", strings.Join( []string{ - string(framework.DeleteTestingNSPolicyAlways), - string(framework.DeleteTestingNSPolicyNever), - string(framework.DeleteTestingNSPolicyOnSuccess), + string(framework.CleanupPolicyAlways), + string(framework.CleanupPolicyNever), + string(framework.CleanupPolicyOnSuccess), + }, + ", ", + ))) + utilruntime.Must(cmd.PersistentFlags().MarkDeprecated("delete-namespace-policy", "--delete-namespace-policy is deprecated - please use --cleanup-policy instead")) + cmd.PersistentFlags().StringVarP(&o.CleanupPolicyUntyped, "cleanup-policy", "", o.CleanupPolicyUntyped, fmt.Sprintf("Cleanup policy. Allowed values are [%s].", strings.Join( + []string{ + string(framework.CleanupPolicyAlways), + string(framework.CleanupPolicyNever), + string(framework.CleanupPolicyOnSuccess), }, ", ", ))) @@ -118,10 +128,10 @@ func (o *TestFrameworkOptions) Validate(args []string) error { errors = append(errors, err) } - switch p := framework.DeleteTestingNSPolicyType(o.DeleteTestingNSPolicyUntyped); p { - case framework.DeleteTestingNSPolicyAlways, - framework.DeleteTestingNSPolicyOnSuccess, - framework.DeleteTestingNSPolicyNever: + switch p := framework.CleanupPolicyType(o.CleanupPolicyUntyped); p { + case framework.CleanupPolicyAlways, + framework.CleanupPolicyOnSuccess, + framework.CleanupPolicyNever: default: errors = append(errors, fmt.Errorf("invalid DeleteTestingNSPolicy: %q", p)) } @@ -174,7 +184,7 @@ func (o *TestFrameworkOptions) Complete(args []string) error { return err } - o.DeleteTestingNSPolicy = framework.DeleteTestingNSPolicyType(o.DeleteTestingNSPolicyUntyped) + o.CleanupPolicy = framework.CleanupPolicyType(o.CleanupPolicyUntyped) // Trim spaces so we can reason later if the dir is set or not o.ArtifactsDir = strings.TrimSpace(o.ArtifactsDir) @@ -216,13 +226,13 @@ func (o *TestFrameworkOptions) Complete(args []string) error { RestConfigs: slices.ConvertSlice(o.ClientConfigs, func(cc genericclioptions.ClientConfig) *rest.Config { return cc.RestConfig }), - ArtifactsDir: o.ArtifactsDir, - DeleteTestingNSPolicy: o.DeleteTestingNSPolicy, - ScyllaClusterOptions: o.scyllaClusterOptions, - ObjectStorageType: o.objectStorageType, - ObjectStorageBucket: o.ObjectStorageBucket, - GCSServiceAccountKey: o.gcsServiceAccountKey, - S3CredentialsFile: o.s3CredentialsFile, + ArtifactsDir: o.ArtifactsDir, + CleanupPolicy: o.CleanupPolicy, + ScyllaClusterOptions: o.scyllaClusterOptions, + ObjectStorageType: o.objectStorageType, + ObjectStorageBucket: o.ObjectStorageBucket, + GCSServiceAccountKey: o.gcsServiceAccountKey, + S3CredentialsFile: o.s3CredentialsFile, } if o.IngressController != nil { diff --git a/pkg/naming/names.go b/pkg/naming/names.go index d00bbe80cfc..423740ec8ac 100644 --- a/pkg/naming/names.go +++ b/pkg/naming/names.go @@ -13,15 +13,13 @@ import ( ) func ManualRef(namespace, name string) string { + if len(namespace) == 0 { + return name + } return fmt.Sprintf("%s/%s", namespace, name) } func ObjRef(obj metav1.Object) string { - namespace := obj.GetNamespace() - if len(namespace) == 0 { - return obj.GetName() - } - return ManualRef(obj.GetNamespace(), obj.GetName()) } diff --git a/test/e2e/framework/cleanup.go b/test/e2e/framework/cleanup.go new file mode 100644 index 00000000000..e1235c64791 --- /dev/null +++ b/test/e2e/framework/cleanup.go @@ -0,0 +1,228 @@ +package framework + +import ( + "context" + "fmt" + "path/filepath" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + "github.com/scylladb/scylla-operator/pkg/gather/collect" + "github.com/scylladb/scylla-operator/pkg/naming" + "github.com/scylladb/scylla-operator/pkg/pointer" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + cacheddiscovery "k8s.io/client-go/discovery/cached/memory" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" +) + +type CleanupInterface interface { + CollectToLog(ctx context.Context) + Collect(ctx context.Context, artifactsDir string, ginkgoNamespace string) + Cleanup(ctx context.Context) +} + +type NamespaceCleaner struct { + Client kubernetes.Interface + DynamicClient dynamic.Interface + NS *corev1.Namespace +} + +var _ CleanupInterface = &NamespaceCleaner{} + +func (nc *NamespaceCleaner) CollectToLog(ctx context.Context) { + // Log events if the test failed. + if g.CurrentSpecReport().Failed() { + By(fmt.Sprintf("Collecting events from namespace %q.", nc.NS.Name)) + DumpEventsInNamespace(ctx, nc.Client, nc.NS.Name) + } +} + +func (nc *NamespaceCleaner) Collect(ctx context.Context, artifactsDir string, _ string) { + By(fmt.Sprintf("Collecting dumps from namespace %q.", nc.NS.Name)) + + err := DumpNamespace(ctx, cacheddiscovery.NewMemCacheClient(nc.Client.Discovery()), nc.DynamicClient, nc.Client.CoreV1(), artifactsDir, nc.NS.Name) + o.Expect(err).NotTo(o.HaveOccurred()) +} + +func (nc *NamespaceCleaner) Cleanup(ctx context.Context) { + By("Destroying namespace %q.", nc.NS.Name) + err := nc.Client.CoreV1().Namespaces().Delete( + ctx, + nc.NS.Name, + metav1.DeleteOptions{ + GracePeriodSeconds: pointer.Ptr[int64](0), + PropagationPolicy: pointer.Ptr(metav1.DeletePropagationForeground), + Preconditions: &metav1.Preconditions{ + UID: &nc.NS.UID, + }, + }, + ) + o.Expect(err).NotTo(o.HaveOccurred()) + + // We have deleted only the namespace object, but it can still be there with deletionTimestamp set. + By("Waiting for namespace %q to be removed.", nc.NS.Name) + err = WaitForObjectDeletion(ctx, nc.DynamicClient, corev1.SchemeGroupVersion.WithResource("namespaces"), "", nc.NS.Name, &nc.NS.UID) + o.Expect(err).NotTo(o.HaveOccurred()) + klog.InfoS("Namespace removed.", "Namespace", nc.NS.Name) +} + +type RestoreStrategy string + +const ( + RestoreStrategyRecreate RestoreStrategy = "Recreate" + RestoreStrategyUpdate RestoreStrategy = "Update" +) + +type RestoringCleaner struct { + client kubernetes.Interface + dynamicClient dynamic.Interface + resourceInfo collect.ResourceInfo + object *unstructured.Unstructured + strategy RestoreStrategy +} + +var _ CleanupInterface = &RestoringCleaner{} + +func NewRestoringCleaner(ctx context.Context, client kubernetes.Interface, dynamicClient dynamic.Interface, resourceInfo collect.ResourceInfo, namespace string, name string, strategy RestoreStrategy) *RestoringCleaner { + g.By(fmt.Sprintf("Snapshotting object %s %q", resourceInfo.Resource, naming.ManualRef(namespace, name))) + + if resourceInfo.Scope.Name() == meta.RESTScopeNameNamespace { + o.Expect(namespace).NotTo(o.BeEmpty()) + } + + obj, err := dynamicClient.Resource(resourceInfo.Resource).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + klog.InfoS("No existing object found", "GVR", resourceInfo.Resource, "Instance", naming.ManualRef(namespace, name)) + obj = &unstructured.Unstructured{ + Object: map[string]interface{}{}, + } + obj.SetNamespace(namespace) + obj.SetName(name) + obj.SetUID("") + } else { + o.Expect(err).NotTo(o.HaveOccurred()) + klog.InfoS("Snapshotted object", "GVR", resourceInfo.Resource, "Instance", naming.ManualRef(namespace, name), "UID", obj.GetUID()) + } + + return &RestoringCleaner{ + client: client, + dynamicClient: dynamicClient, + resourceInfo: resourceInfo, + object: obj, + strategy: strategy, + } +} + +func (rc *RestoringCleaner) getCleansedObject() *unstructured.Unstructured { + obj := rc.object.DeepCopy() + obj.SetResourceVersion("") + obj.SetUID("") + obj.SetCreationTimestamp(metav1.Time{}) + obj.SetDeletionTimestamp(nil) + return obj +} + +func (rc *RestoringCleaner) CollectToLog(ctx context.Context) {} + +func (rc *RestoringCleaner) Collect(ctx context.Context, clusterArtifactsDir string, ginkgoNamespace string) { + artifactsDir := clusterArtifactsDir + if len(artifactsDir) != 0 && rc.resourceInfo.Scope.Name() == meta.RESTScopeNameRoot { + // We have to prevent global object dumps being overwritten with each "It" block. + artifactsDir = filepath.Join(artifactsDir, "cluster-scoped-per-ns", ginkgoNamespace) + } + + By(fmt.Sprintf("Collecting global %s %q for namespace %q.", rc.resourceInfo.Resource, naming.ObjRef(rc.object), ginkgoNamespace)) + + err := DumpResource( + ctx, + rc.client.Discovery(), + rc.dynamicClient, + rc.client.CoreV1(), + artifactsDir, + &rc.resourceInfo, + rc.object.GetNamespace(), + rc.object.GetName(), + ) + if apierrors.IsNotFound(err) { + klog.V(2).InfoS("Skipping object collection because it no longer exists", "Ref", naming.ObjRef(rc.object), "Resource", rc.resourceInfo.Resource) + } else { + o.Expect(err).NotTo(o.HaveOccurred()) + } +} + +func (rc *RestoringCleaner) DeleteObject(ctx context.Context, ignoreNotFound bool) { + By("Deleting object %s %q.", rc.resourceInfo.Resource, naming.ObjRef(rc.object)) + err := rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Delete( + ctx, + rc.object.GetName(), + metav1.DeleteOptions{ + GracePeriodSeconds: pointer.Ptr[int64](0), + PropagationPolicy: pointer.Ptr(metav1.DeletePropagationForeground), + }, + ) + if apierrors.IsNotFound(err) && ignoreNotFound { + return + } + o.Expect(err).NotTo(o.HaveOccurred()) + + // We have deleted only the object, but it can still be there with deletionTimestamp set. + By("Waiting for object %s %q to be removed.", rc.resourceInfo.Resource, naming.ObjRef(rc.object)) + err = WaitForObjectDeletion(ctx, rc.dynamicClient, rc.resourceInfo.Resource, rc.object.GetNamespace(), rc.object.GetName(), nil) + o.Expect(err).NotTo(o.HaveOccurred()) + By("Object %s %q has been removed.", rc.resourceInfo.Resource, naming.ObjRef(rc.object)) +} + +func (rc *RestoringCleaner) RecreateObject(ctx context.Context) { + rc.DeleteObject(ctx, true) + + _, err := rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Create(ctx, rc.getCleansedObject(), metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) +} + +func (rc *RestoringCleaner) ReplaceObject(ctx context.Context) { + var err error + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + var freshObj *unstructured.Unstructured + freshObj, err = rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Get(ctx, rc.object.GetName(), metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + _, err = rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Create(ctx, rc.getCleansedObject(), metav1.CreateOptions{}) + return err + } + + obj := rc.getCleansedObject() + obj.SetResourceVersion(freshObj.GetResourceVersion()) + + o.Expect(err).NotTo(o.HaveOccurred()) + _, err = rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(obj.GetNamespace()).Update(ctx, obj, metav1.UpdateOptions{}) + return err + }) + o.Expect(err).NotTo(o.HaveOccurred()) +} + +func (rc *RestoringCleaner) RestoreObject(ctx context.Context) { + By("Restoring original object %s %q.", rc.resourceInfo.Resource, naming.ObjRef(rc.object)) + switch rc.strategy { + case RestoreStrategyRecreate: + rc.RecreateObject(ctx) + case RestoreStrategyUpdate: + rc.ReplaceObject(ctx) + default: + g.Fail(fmt.Sprintf("unexpected strategy %q", rc.strategy)) + } +} + +func (rc *RestoringCleaner) Cleanup(ctx context.Context) { + if len(rc.object.GetUID()) == 0 { + rc.DeleteObject(ctx, true) + return + } + + rc.RestoreObject(ctx) +} diff --git a/test/e2e/framework/cluster.go b/test/e2e/framework/cluster.go index 18fb07901e7..3b1c8883d8b 100644 --- a/test/e2e/framework/cluster.go +++ b/test/e2e/framework/cluster.go @@ -4,19 +4,10 @@ package framework import ( "context" - "fmt" - "os" - "path" - g "github.com/onsi/ginkgo/v2" - o "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - cacheddiscovery "k8s.io/client-go/discovery/cached/memory" - "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" - "k8s.io/klog/v2" ) type ClusterInterface interface { @@ -30,18 +21,20 @@ type createNamespaceFunc func(ctx context.Context, adminClient kubernetes.Interf type Cluster struct { AdminClient - name string + name string + artifactsDir string - createNamespace createNamespaceFunc - defaultNamespace *corev1.Namespace - defaultClient Client - namespacesToDelete []*corev1.Namespace + createNamespace createNamespaceFunc + defaultNamespace *corev1.Namespace + defaultClient Client + + cleaners []CleanupInterface } var _ AdminClientInterface = &Cluster{} var _ ClusterInterface = &Cluster{} -func NewCluster(name string, restConfig *restclient.Config, createNamespace createNamespaceFunc) *Cluster { +func NewCluster(name string, artifactsDir string, restConfig *restclient.Config, createNamespace createNamespaceFunc) *Cluster { adminClientConfig := restclient.CopyConfig(restConfig) return &Cluster{ @@ -49,14 +42,15 @@ func NewCluster(name string, restConfig *restclient.Config, createNamespace crea Config: adminClientConfig, }, - name: name, + name: name, + artifactsDir: artifactsDir, createNamespace: createNamespace, defaultNamespace: nil, defaultClient: Client{ Config: nil, }, - namespacesToDelete: []*corev1.Namespace{}, + cleaners: nil, } } @@ -68,89 +62,39 @@ func (c *Cluster) DefaultNamespaceIfAny() (*corev1.Namespace, Client, bool) { return c.defaultNamespace, c.defaultClient, true } +func (c *Cluster) AddCleaners(cleaners ...CleanupInterface) { + c.cleaners = append(c.cleaners, cleaners...) +} + +func (c *Cluster) GetArtifactsDir() string { + return c.artifactsDir +} + func (c *Cluster) CreateUserNamespace(ctx context.Context) (*corev1.Namespace, Client) { ns, nsClient := c.createNamespace(ctx, c.KubeAdminClient(), c.AdminClientConfig()) - c.namespacesToDelete = append(c.namespacesToDelete, ns) + c.AddCleaners(&NamespaceCleaner{ + Client: c.KubeAdminClient(), + DynamicClient: c.DynamicAdminClient(), + NS: ns, + }) return ns, nsClient } -func (c *Cluster) Cleanup(ctx context.Context) { - for _, ns := range c.namespacesToDelete { - collectAndDeleteNamespace(ctx, c.KubeAdminClient(), c.DynamicAdminClient(), ns) - } - - c.namespacesToDelete = []*corev1.Namespace{} -} - -func collectAndDeleteNamespace(ctx context.Context, adminClient kubernetes.Interface, dynamicAdminClient dynamic.Interface, ns *corev1.Namespace) { - defer func() { - keepNamespace := false - switch TestContext.DeleteTestingNSPolicy { - case DeleteTestingNSPolicyNever: - keepNamespace = true - case DeleteTestingNSPolicyOnSuccess: - if g.CurrentSpecReport().Failed() { - keepNamespace = true - } - case DeleteTestingNSPolicyAlways: - default: +func (c *Cluster) Collect(ctx context.Context, ginkgoNamespace string) { + for _, cleaner := range c.cleaners { + cleaner.CollectToLog(ctx) + if len(c.artifactsDir) != 0 { + cleaner.Collect(ctx, c.artifactsDir, ginkgoNamespace) } - - if keepNamespace { - By("Keeping namespace %q for debugging", ns.Name) - return - } - - deleteNamespace(ctx, adminClient, dynamicAdminClient, ns) - - }() - - // Print events if the test failed. - if g.CurrentSpecReport().Failed() { - By(fmt.Sprintf("Collecting events from namespace %q.", ns.Name)) - DumpEventsInNamespace(ctx, adminClient, ns.Name) } +} - // CI can't keep namespaces alive because it could get out of resources for the other tests - // so we need to collect the namespaced dump before destroying the namespace. - // Collecting artifacts even for successful runs helps to verify if it went - // as expected and the amount of data is bearable. - if len(TestContext.ArtifactsDir) != 0 { - By(fmt.Sprintf("Collecting dumps from namespace %q.", ns.Name)) - - d := path.Join(TestContext.ArtifactsDir, "e2e") - err := os.Mkdir(d, 0777) - if err != nil && !os.IsExist(err) { - o.Expect(err).NotTo(o.HaveOccurred()) - } - - err = DumpNamespace(ctx, cacheddiscovery.NewMemCacheClient(adminClient.Discovery()), dynamicAdminClient, adminClient.CoreV1(), d, ns.Name) - o.Expect(err).NotTo(o.HaveOccurred()) +func (c *Cluster) Cleanup(ctx context.Context) { + for _, cleaner := range c.cleaners { + cleaner.Cleanup(ctx) } -} -func deleteNamespace(ctx context.Context, adminClient kubernetes.Interface, dynamicAdminClient dynamic.Interface, ns *corev1.Namespace) { - By("Destroying namespace %q.", ns.Name) - var gracePeriod int64 = 0 - var propagation = metav1.DeletePropagationForeground - err := adminClient.CoreV1().Namespaces().Delete( - ctx, - ns.Name, - metav1.DeleteOptions{ - GracePeriodSeconds: &gracePeriod, - PropagationPolicy: &propagation, - Preconditions: &metav1.Preconditions{ - UID: &ns.UID, - }, - }, - ) - o.Expect(err).NotTo(o.HaveOccurred()) - - // We have deleted only the namespace object but it is still there with deletionTimestamp set. - By("Waiting for namespace %q to be removed.", ns.Name) - err = WaitForObjectDeletion(ctx, dynamicAdminClient, corev1.SchemeGroupVersion.WithResource("namespaces"), "", ns.Name, &ns.UID) - o.Expect(err).NotTo(o.HaveOccurred()) - klog.InfoS("Namespace removed.", "Namespace", ns.Name) + c.cleaners = c.cleaners[:0] } diff --git a/test/e2e/framework/dump.go b/test/e2e/framework/dump.go index b58b655f6c5..89de50c2e49 100644 --- a/test/e2e/framework/dump.go +++ b/test/e2e/framework/dump.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/scylladb/scylla-operator/pkg/gather/collect" + "github.com/scylladb/scylla-operator/pkg/naming" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime/schema" @@ -13,7 +14,7 @@ import ( corev1client "k8s.io/client-go/kubernetes/typed/core/v1" ) -func DumpNamespace(ctx context.Context, discoveryClient discovery.DiscoveryInterface, dynamicClient dynamic.Interface, corev1Client corev1client.CoreV1Interface, artifactsDir, namespace string) error { +func DumpResource(ctx context.Context, discoveryClient discovery.DiscoveryInterface, dynamicClient dynamic.Interface, corev1Client corev1client.CoreV1Interface, artifactsDir string, resourceInfo *collect.ResourceInfo, namespace string, name string) error { collector := collect.NewCollector( artifactsDir, []collect.ResourcePrinterInterface{ @@ -30,6 +31,24 @@ func DumpNamespace(ctx context.Context, discoveryClient discovery.DiscoveryInter ) err := collector.CollectResource( ctx, + resourceInfo, + namespace, + name, + ) + if err != nil { + return fmt.Errorf("can't collect object %q (%s): %w", naming.ManualRef(namespace, name), resourceInfo.Resource.String(), err) + } + + return nil +} + +func DumpNamespace(ctx context.Context, discoveryClient discovery.DiscoveryInterface, dynamicClient dynamic.Interface, corev1Client corev1client.CoreV1Interface, artifactsDir string, name string) error { + return DumpResource( + ctx, + discoveryClient, + dynamicClient, + corev1Client, + artifactsDir, &collect.ResourceInfo{ Resource: schema.GroupVersionResource{ Group: "", @@ -39,11 +58,6 @@ func DumpNamespace(ctx context.Context, discoveryClient discovery.DiscoveryInter Scope: meta.RESTScopeRoot, }, corev1.NamespaceAll, - namespace, + name, ) - if err != nil { - return fmt.Errorf("can't collect namespace %q: %w", namespace, err) - } - - return nil } diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 5c7d589f4cf..a1dfc818abb 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -5,6 +5,8 @@ package framework import ( "context" "fmt" + "os" + "path" "strconv" "time" @@ -33,7 +35,8 @@ const ( type Framework struct { FullClient - name string + name string + e2eArtifactsDir string clusters []*Cluster } @@ -42,28 +45,51 @@ var _ FullClientInterface = &Framework{} var _ ClusterInterface = &Framework{} func NewFramework(namePrefix string) *Framework { + var err error + f := &Framework{ - name: names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", namePrefix)), - FullClient: FullClient{}, + name: names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", namePrefix)), + e2eArtifactsDir: "", + FullClient: FullClient{}, + clusters: make([]*Cluster, 0, len(TestContext.RestConfigs)), + } + + if len(TestContext.ArtifactsDir) != 0 { + f.e2eArtifactsDir = path.Join(TestContext.ArtifactsDir, "e2e") + err = os.Mkdir(f.e2eArtifactsDir, 0777) + if !os.IsExist(err) { + o.Expect(err).NotTo(o.HaveOccurred()) + } } - clusters := make([]*Cluster, 0, len(TestContext.RestConfigs)) + o.Expect(TestContext.RestConfigs).NotTo(o.BeEmpty()) for i, restConfig := range TestContext.RestConfigs { clusterName := fmt.Sprintf("%s-%d", f.name, i) + + clusterE2EArtifactsDir := "" + if len(f.e2eArtifactsDir) != 0 { + clusterE2EArtifactsDir = path.Join(f.e2eArtifactsDir, fmt.Sprintf("cluster-%d", i)) + err = os.Mkdir(clusterE2EArtifactsDir, 0777) + if !os.IsExist(err) { + o.Expect(err).NotTo(o.HaveOccurred()) + } + } + c := NewCluster( clusterName, + clusterE2EArtifactsDir, restConfig, func(ctx context.Context, adminClient kubernetes.Interface, adminClientConfig *restclient.Config) (*corev1.Namespace, Client) { return CreateUserNamespace(ctx, clusterName, f.CommonLabels(), adminClient, adminClientConfig) }, ) - clusters = append(clusters, c) + f.clusters = append(f.clusters, c) } - f.clusters = clusters f.FullClient.AdminClient.Config = f.defaultCluster().AdminClientConfig() g.BeforeEach(f.beforeEach) + g.JustAfterEach(f.justAfterEach) g.AfterEach(f.afterEach) return f @@ -82,6 +108,10 @@ func (f *Framework) DefaultNamespaceIfAny() (*corev1.Namespace, Client, bool) { return f.defaultCluster().DefaultNamespaceIfAny() } +func (f *Framework) GetDefaultArtifactsDir() string { + return f.defaultCluster().GetArtifactsDir() +} + func (f *Framework) GetIngressAddress(hostname string) string { if TestContext.IngressController == nil || len(TestContext.IngressController.Address) == 0 { return hostname @@ -130,6 +160,10 @@ func (f *Framework) GetDefaultZonalScyllaClusterWithThreeRacks() *scyllav1.Scyll return sc } +func (f *Framework) AddCleaners(cleaners ...CleanupInterface) { + f.defaultCluster().AddCleaners(cleaners...) +} + func (f *Framework) CreateUserNamespace(ctx context.Context) (*corev1.Namespace, Client) { return f.defaultCluster().CreateUserNamespace(ctx) } @@ -162,6 +196,7 @@ func (f *Framework) GetS3CredentialsFile() []byte { } func (f *Framework) defaultCluster() *Cluster { + o.Expect(f.clusters).NotTo(o.BeEmpty()) return f.clusters[0] } @@ -172,6 +207,13 @@ func (f *Framework) beforeEach(ctx context.Context) { f.FullClient.Client = nsClient } +func (f *Framework) justAfterEach(ctx context.Context) { + ginkgoNamespace := f.defaultCluster().defaultNamespace.Name + for _, c := range f.clusters { + c.Collect(ctx, ginkgoNamespace) + } +} + func (f *Framework) afterEach(ctx context.Context) { nilClient := Client{ Config: nil, @@ -181,8 +223,23 @@ func (f *Framework) afterEach(ctx context.Context) { f.defaultCluster().defaultClient = nilClient f.FullClient.Client = nilClient - for _, c := range f.clusters { - c.Cleanup(ctx) + shouldCleanup := true + switch TestContext.CleanupPolicy { + case CleanupPolicyNever: + shouldCleanup = false + case CleanupPolicyOnSuccess: + if g.CurrentSpecReport().Failed() { + shouldCleanup = false + } + case CleanupPolicyAlways: + default: + g.Fail(fmt.Sprintf("unexpected cleanup policy %q", TestContext.CleanupPolicy)) + } + + if shouldCleanup { + for _, c := range f.clusters { + c.Cleanup(ctx) + } } } diff --git a/test/e2e/framework/testcontext.go b/test/e2e/framework/testcontext.go index 70584a8716a..effd0435de9 100644 --- a/test/e2e/framework/testcontext.go +++ b/test/e2e/framework/testcontext.go @@ -9,12 +9,12 @@ import ( var TestContext *TestContextType -type DeleteTestingNSPolicyType string +type CleanupPolicyType string var ( - DeleteTestingNSPolicyAlways DeleteTestingNSPolicyType = "Always" - DeleteTestingNSPolicyOnSuccess DeleteTestingNSPolicyType = "OnSuccess" - DeleteTestingNSPolicyNever DeleteTestingNSPolicyType = "Never" + CleanupPolicyAlways CleanupPolicyType = "Always" + CleanupPolicyOnSuccess CleanupPolicyType = "OnSuccess" + CleanupPolicyNever CleanupPolicyType = "Never" ) type IngressController struct { @@ -43,13 +43,13 @@ const ( ) type TestContextType struct { - RestConfigs []*restclient.Config - ArtifactsDir string - DeleteTestingNSPolicy DeleteTestingNSPolicyType - IngressController *IngressController - ScyllaClusterOptions *ScyllaClusterOptions - ObjectStorageType ObjectStorageType - ObjectStorageBucket string - GCSServiceAccountKey []byte - S3CredentialsFile []byte + RestConfigs []*restclient.Config + ArtifactsDir string + CleanupPolicy CleanupPolicyType + IngressController *IngressController + ScyllaClusterOptions *ScyllaClusterOptions + ObjectStorageType ObjectStorageType + ObjectStorageBucket string + GCSServiceAccountKey []byte + S3CredentialsFile []byte } diff --git a/test/e2e/set/nodeconfig/config.go b/test/e2e/set/nodeconfig/config.go index 27a4a24ed2d..746c990f0f1 100644 --- a/test/e2e/set/nodeconfig/config.go +++ b/test/e2e/set/nodeconfig/config.go @@ -2,10 +2,35 @@ package nodeconfig -import "time" +import ( + "time" + + "github.com/scylladb/scylla-operator/pkg/gather/collect" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/runtime/schema" +) const ( testTimeout = 15 * time.Minute nodeConfigRolloutTimeout = 5 * time.Minute apiCallTimeout = 5 * time.Second ) + +var ( + nodeConfigResourceInfo = collect.ResourceInfo{ + Resource: schema.GroupVersionResource{ + Group: "scylla.scylladb.com", + Version: "v1alpha1", + Resource: "nodeconfigs", + }, + Scope: meta.RESTScopeRoot, + } + resourceQuotaResourceInfo = collect.ResourceInfo{ + Resource: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "resourcequotas", + }, + Scope: meta.RESTScopeNamespace, + } +) diff --git a/test/e2e/set/nodeconfig/nodeconfig_disksetup.go b/test/e2e/set/nodeconfig/nodeconfig_disksetup.go index e6aa439f5c5..5d81b27f776 100644 --- a/test/e2e/set/nodeconfig/nodeconfig_disksetup.go +++ b/test/e2e/set/nodeconfig/nodeconfig_disksetup.go @@ -5,7 +5,6 @@ package nodeconfig import ( "context" "fmt" - "os" "path" "strings" "time" @@ -14,18 +13,15 @@ import ( o "github.com/onsi/gomega" scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" "github.com/scylladb/scylla-operator/pkg/controllerhelpers" - "github.com/scylladb/scylla-operator/pkg/naming" "github.com/scylladb/scylla-operator/pkg/pointer" scyllafixture "github.com/scylladb/scylla-operator/test/e2e/fixture/scylla" "github.com/scylladb/scylla-operator/test/e2e/framework" "github.com/scylladb/scylla-operator/test/e2e/utils" "github.com/scylladb/scylla-operator/test/e2e/utils/image" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/rand" - cacheddiscovery "k8s.io/client-go/discovery/cached/memory" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/util/retry" @@ -37,61 +33,18 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { ncTemplate := scyllafixture.NodeConfig.ReadOrFail() var matchingNodes []*corev1.Node - preconditionSuccessful := false g.JustBeforeEach(func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Make sure the NodeConfig is not present. - framework.By("Making sure NodeConfig %q, doesn't exist", naming.ObjRef(ncTemplate)) - _, err := f.ScyllaAdminClient().ScyllaV1alpha1().NodeConfigs().Get(ctx, ncTemplate.Name, metav1.GetOptions{}) - if err == nil { - framework.Failf("NodeConfig %q can't be present before running this test", naming.ObjRef(ncTemplate)) - } else if !apierrors.IsNotFound(err) { - framework.Failf("Can't get NodeConfig %q: %v", naming.ObjRef(ncTemplate), err) - } - - preconditionSuccessful = true - g.By("Verifying there is at least one scylla node") + var err error matchingNodes, err = utils.GetMatchingNodesForNodeConfig(ctx, f.KubeAdminClient().CoreV1(), ncTemplate) o.Expect(err).NotTo(o.HaveOccurred()) o.Expect(matchingNodes).NotTo(o.HaveLen(0)) framework.Infof("There are %d scylla nodes", len(matchingNodes)) }) - g.JustAfterEach(func() { - if !preconditionSuccessful { - return - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - if len(framework.TestContext.ArtifactsDir) > 0 { - framework.By("Collecting NodeConfig namespace dump") - - dir := path.Join(framework.TestContext.ArtifactsDir, "nodeconfig-related", f.Namespace()) - err := os.MkdirAll(dir, 0777) - if err != nil && !os.IsExist(err) { - o.Expect(err).NotTo(o.HaveOccurred()) - } - - err = framework.DumpNamespace(ctx, cacheddiscovery.NewMemCacheClient(f.KubeAdminClient().Discovery()), f.DynamicAdminClient(), f.KubeAdminClient().CoreV1(), dir, naming.ScyllaOperatorNodeTuningNamespace) - o.Expect(err).NotTo(o.HaveOccurred()) - } - - framework.By("Deleting NodeConfig %q, if it exists", naming.ObjRef(ncTemplate)) - err := f.ScyllaAdminClient().ScyllaV1alpha1().NodeConfigs().Delete(context.Background(), ncTemplate.Name, metav1.DeleteOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - framework.Failf("Can't delete NodeConfig %q: %v", naming.ObjRef(ncTemplate), err) - } - if !apierrors.IsNotFound(err) { - err = framework.WaitForObjectDeletion(context.Background(), f.DynamicAdminClient(), scyllav1alpha1.GroupVersion.WithResource("nodeconfigs"), ncTemplate.Namespace, ncTemplate.Name, nil) - o.Expect(err).NotTo(o.HaveOccurred()) - } - }) - g.DescribeTable("should make RAID0 array out of loop devices, format it to XFS, and mount at desired location", func(numberOfDevices int) { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) defer cancel() @@ -205,6 +158,18 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { }, } + rc := framework.NewRestoringCleaner( + ctx, + f.KubeAdminClient(), + f.DynamicAdminClient(), + nodeConfigResourceInfo, + nc.Namespace, + nc.Name, + framework.RestoreStrategyRecreate, + ) + f.AddCleaners(rc) + rc.DeleteObject(ctx, true) + g.By("Creating a NodeConfig") nc, err = f.ScyllaAdminClient().ScyllaV1alpha1().NodeConfigs().Create(ctx, nc, metav1.CreateOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) diff --git a/test/e2e/set/nodeconfig/nodeconfig_optimizations.go b/test/e2e/set/nodeconfig/nodeconfig_optimizations.go index 8a1cc6a5e98..39a19531b73 100644 --- a/test/e2e/set/nodeconfig/nodeconfig_optimizations.go +++ b/test/e2e/set/nodeconfig/nodeconfig_optimizations.go @@ -6,12 +6,12 @@ import ( "context" "encoding/json" "fmt" + "path/filepath" "sort" "time" g "github.com/onsi/ginkgo/v2" o "github.com/onsi/gomega" - scyllav1alpha1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1alpha1" "github.com/scylladb/scylla-operator/pkg/controllerhelpers" "github.com/scylladb/scylla-operator/pkg/internalapi" "github.com/scylladb/scylla-operator/pkg/naming" @@ -37,60 +37,34 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { ncTemplate := scyllafixture.NodeConfig.ReadOrFail() var matchingNodes []*corev1.Node - preconditionSuccessful := false g.JustBeforeEach(func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // Make sure the NodeConfig is not present. - framework.By("Making sure NodeConfig %q, doesn't exist", naming.ObjRef(ncTemplate)) - _, err := f.ScyllaAdminClient().ScyllaV1alpha1().NodeConfigs().Get(ctx, ncTemplate.Name, metav1.GetOptions{}) - if err == nil { - framework.Failf("NodeConfig %q can't be present before running this test", naming.ObjRef(ncTemplate)) - } else if !apierrors.IsNotFound(err) { - framework.Failf("Can't get NodeConfig %q: %v", naming.ObjRef(ncTemplate), err) - } - - preconditionSuccessful = true - g.By("Verifying there is at least one scylla node") + var err error matchingNodes, err = utils.GetMatchingNodesForNodeConfig(ctx, f.KubeAdminClient().CoreV1(), ncTemplate) o.Expect(err).NotTo(o.HaveOccurred()) o.Expect(matchingNodes).NotTo(o.HaveLen(0)) framework.Infof("There are %d scylla nodes", len(matchingNodes)) }) - g.JustAfterEach(func() { - if !preconditionSuccessful { - return - } - - framework.By("Deleting NodeConfig %q, if it exists", naming.ObjRef(ncTemplate)) - err := f.ScyllaAdminClient().ScyllaV1alpha1().NodeConfigs().Delete(context.Background(), ncTemplate.Name, metav1.DeleteOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - framework.Failf("Can't delete NodeConfig %q: %v", naming.ObjRef(ncTemplate), err) - } - if !apierrors.IsNotFound(err) { - err = framework.WaitForObjectDeletion(context.Background(), f.DynamicAdminClient(), scyllav1alpha1.GroupVersion.WithResource("nodeconfigs"), ncTemplate.Namespace, ncTemplate.Name, nil) - o.Expect(err).NotTo(o.HaveOccurred()) - } - - framework.By("Deleting ResourceQuota %q, if it exists", naming.ObjRef(ncTemplate)) - err = f.KubeAdminClient().CoreV1().ResourceQuotas(naming.ScyllaOperatorNodeTuningNamespace).Delete(context.Background(), resourceQuotaName, metav1.DeleteOptions{}) - if err != nil && !apierrors.IsNotFound(err) { - framework.Failf("Can't delete ResourceQuota %q: %v", naming.ManualRef(naming.ScyllaOperatorNodeTuningNamespace, resourceQuotaName), err) - } - if !apierrors.IsNotFound(err) { - err = framework.WaitForObjectDeletion(context.Background(), f.DynamicAdminClient(), corev1.SchemeGroupVersion.WithResource(corev1.ResourceQuotas.String()), naming.ScyllaOperatorNodeTuningNamespace, resourceQuotaName, nil) - o.Expect(err).NotTo(o.HaveOccurred()) - } - }) - g.It("should create tuning resources and tune nodes", func() { ctx, cancel := context.WithTimeout(context.Background(), testTimeout) defer cancel() nc := ncTemplate.DeepCopy() + rc := framework.NewRestoringCleaner( + ctx, + f.KubeAdminClient(), + f.DynamicAdminClient(), + nodeConfigResourceInfo, + nc.Namespace, + nc.Name, + framework.RestoreStrategyRecreate, + ) + f.AddCleaners(rc) + rc.DeleteObject(ctx, true) g.By("Creating a NodeConfig") nc, err := f.ScyllaAdminClient().ScyllaV1alpha1().NodeConfigs().Create(ctx, nc, metav1.CreateOptions{}) @@ -147,8 +121,6 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - nc := ncTemplate.DeepCopy() - g.By("Blocking node tuning") // We have to make sure the namespace exists. _, err := f.KubeAdminClient().CoreV1().Namespaces().Create( @@ -164,22 +136,50 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { o.Expect(err).NotTo(o.HaveOccurred()) } - rq, err := f.KubeAdminClient().CoreV1().ResourceQuotas(naming.ScyllaOperatorNodeTuningNamespace).Create( - ctx, - &corev1.ResourceQuota{ - ObjectMeta: metav1.ObjectMeta{ - Name: resourceQuotaName, - }, - Spec: corev1.ResourceQuotaSpec{ - Hard: corev1.ResourceList{ - corev1.ResourcePods: resource.MustParse("0"), - }, + rq := &corev1.ResourceQuota{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceQuotaName, + Namespace: naming.ScyllaOperatorNodeTuningNamespace, + }, + Spec: corev1.ResourceQuotaSpec{ + Hard: corev1.ResourceList{ + corev1.ResourcePods: resource.MustParse("0"), }, }, + } + rqRC := framework.NewRestoringCleaner( + ctx, + f.KubeAdminClient(), + f.DynamicAdminClient(), + resourceQuotaResourceInfo, + rq.Namespace, + rq.Name, + framework.RestoreStrategyUpdate, + ) + f.AddCleaners(rqRC) + rqRC.DeleteObject(ctx, true) + + rq, err = f.KubeAdminClient().CoreV1().ResourceQuotas(naming.ScyllaOperatorNodeTuningNamespace).Create( + ctx, + rq, metav1.CreateOptions{}, ) o.Expect(err).NotTo(o.HaveOccurred()) + nc := ncTemplate.DeepCopy() + + ncRC := framework.NewRestoringCleaner( + ctx, + f.KubeAdminClient(), + f.DynamicAdminClient(), + nodeConfigResourceInfo, + nc.Namespace, + nc.Name, + framework.RestoreStrategyRecreate, + ) + f.AddCleaners(ncRC) + ncRC.DeleteObject(ctx, true) + g.By("Creating a NodeConfig") nc, err = f.ScyllaAdminClient().ScyllaV1alpha1().NodeConfigs().Create(ctx, nc, metav1.CreateOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) @@ -255,12 +255,12 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { o.Expect(utils.IsScyllaClusterRolledOut(sc)).To(o.BeFalse()) framework.By("Unblocking tuning") - err = f.KubeAdminClient().CoreV1().ResourceQuotas(rq.Namespace).Delete( - ctx, - rq.Name, - metav1.DeleteOptions{}, - ) - o.Expect(err).NotTo(o.HaveOccurred()) + intermittentArtifactsDir := "" + if len(f.GetDefaultArtifactsDir()) != 0 { + intermittentArtifactsDir = filepath.Join(f.GetDefaultArtifactsDir(), "intermittent") + } + rqRC.Collect(ctx, intermittentArtifactsDir, f.Namespace()) + rqRC.DeleteObject(ctx, false) pod, err = f.KubeClient().CoreV1().Pods(f.Namespace()).Get( ctx,