Align E2E tests to ScyllaDB 6.0 changes.

Scaling E2E was aligned to make sure it doesn't break minimal required quorum on scaling changes. Existing test scaled below keyspace RF which is no longer possible, as Scylla rejects decommision when there's a keyspace having RF higher than node count. Test step checking decommission of drained node was moved earlier to fix the same quorum breakage. Alternator E2E required a table name change from which we are getting password. Table name was renamed in 6.0. Restore E2E was parametrized to make sure we test the procedure for both default ScyllaDB version and 2024.1 where a workaround explained in the documentation is required.
scylladb · Jun 28, 2024 · f91a115 · f91a115
1 parent 6f3ff65
commit f91a115
Show file tree

Hide file tree

Showing 3 changed files with 120 additions and 102 deletions.
diff --git a/test/e2e/set/scyllacluster/scyllacluster_alternator.go b/test/e2e/set/scyllacluster/scyllacluster_alternator.go
@@ -128,7 +128,7 @@ authorizer: CassandraAuthorizer
 		}
 
 		q := cqlSession.Query(
-			`SELECT salted_hash FROM system_auth.roles WHERE role = ?`,
+			`SELECT salted_hash FROM system.roles WHERE role = ?`,
 			awsCredentials.AccessKeyID,
 		).WithContext(ctx)
 		err = q.Scan(&awsCredentials.SecretAccessKey)

diff --git a/test/e2e/set/scyllacluster/scyllacluster_scaling.go b/test/e2e/set/scyllacluster/scyllacluster_scaling.go
@@ -28,9 +28,9 @@ var _ = g.Describe("ScyllaCluster", func() {
 		defer cancel()
 
 		sc := f.GetDefaultScyllaCluster()
-		sc.Spec.Datacenter.Racks[0].Members = 1
+		sc.Spec.Datacenter.Racks[0].Members = 3
 
-		framework.By("Creating a ScyllaCluster with 1 member")
+		framework.By("Creating a ScyllaCluster with 3 members")
 		sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{})
 		o.Expect(err).NotTo(o.HaveOccurred())
 
@@ -45,22 +45,22 @@ var _ = g.Describe("ScyllaCluster", func() {
 
 		hosts, hostIDs, err := utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
 		o.Expect(err).NotTo(o.HaveOccurred())
-		o.Expect(hosts).To(o.HaveLen(1))
-		o.Expect(hostIDs).To(o.HaveLen(1))
-		diRF1 := insertAndVerifyCQLData(ctx, hosts)
-		defer diRF1.Close()
+		o.Expect(hosts).To(o.HaveLen(3))
+		o.Expect(hostIDs).To(o.HaveLen(3))
+		diRF3 := insertAndVerifyCQLData(ctx, hosts)
+		defer diRF3.Close()
 
-		framework.By("Scaling the ScyllaCluster to 3 replicas")
+		framework.By("Scaling the ScyllaCluster to 5 replicas")
 		sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
 			ctx,
 			sc.Name,
 			types.JSONPatchType,
-			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`),
+			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 5}]`),
 			metav1.PatchOptions{},
 		)
 		o.Expect(err).NotTo(o.HaveOccurred())
 		o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1))
-		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3))
+		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(5))
 
 		framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
 		waitCtx2, waitCtx2Cancel := utils.ContextForRollout(ctx, sc)
@@ -75,63 +75,17 @@ var _ = g.Describe("ScyllaCluster", func() {
 		oldHostIDs := hostIDs
 		hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
 		o.Expect(err).NotTo(o.HaveOccurred())
-		o.Expect(oldHosts).To(o.HaveLen(1))
-		o.Expect(oldHostIDs).To(o.HaveLen(1))
-		o.Expect(hosts).To(o.HaveLen(3))
-		o.Expect(hostIDs).To(o.HaveLen(3))
-		o.Expect(hostIDs).To(o.ContainElements(oldHostIDs))
-
-		verifyCQLData(ctx, diRF1)
-
-		// Statistically, some data should land on the 3rd node that will give us a chance to ensure
-		// it was stream correctly when downscaling.
-		diRF2 := insertAndVerifyCQLData(ctx, hosts[0:2])
-		defer diRF2.Close()
-
-		diRF3 := insertAndVerifyCQLData(ctx, hosts)
-		defer diRF3.Close()
-
-		framework.By("Scaling the ScyllaCluster down to 2 replicas")
-		sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace).Patch(
-			ctx,
-			sc.Name,
-			types.JSONPatchType,
-			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 2}]`),
-			metav1.PatchOptions{},
-		)
-		o.Expect(err).NotTo(o.HaveOccurred())
-		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(2))
-
-		framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
-		waitCtx3, waitCtx3Cancel := utils.ContextForRollout(ctx, sc)
-		defer waitCtx3Cancel()
-		sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx3, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
-		o.Expect(err).NotTo(o.HaveOccurred())
-
-		verifyScyllaCluster(ctx, f.KubeClient(), sc)
-		waitForFullQuorum(ctx, f.KubeClient().CoreV1(), sc)
-
-		oldHosts = hosts
-		oldHostIDs = hostIDs
-		hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
-		o.Expect(err).NotTo(o.HaveOccurred())
 		o.Expect(oldHosts).To(o.HaveLen(3))
 		o.Expect(oldHostIDs).To(o.HaveLen(3))
-		o.Expect(hosts).To(o.HaveLen(2))
-		o.Expect(hostIDs).To(o.HaveLen(2))
-		o.Expect(oldHostIDs).To(o.ContainElements(hostIDs))
-
-		verifyCQLData(ctx, diRF1)
+		o.Expect(hosts).To(o.HaveLen(5))
+		o.Expect(hostIDs).To(o.HaveLen(5))
+		o.Expect(hostIDs).To(o.ContainElements(oldHostIDs))
 
-		// The 2 nodes out of 3 we used earlier may not be the ones that got left. Although discovery will still
-		// make sure the missing one is picked up, let's avoid having a down node in the pool and refresh it.
-		err = diRF2.SetClientEndpoints(hosts)
-		o.Expect(err).NotTo(o.HaveOccurred())
-		verifyCQLData(ctx, diRF2)
+		verifyCQLData(ctx, diRF3)
 
-		podName := naming.StatefulSetNameForRack(sc.Spec.Datacenter.Racks[0], sc) + "-1"
+		podName := naming.StatefulSetNameForRack(sc.Spec.Datacenter.Racks[0], sc) + "-4"
 		svcName := podName
-		framework.By("Marking ScyllaCluster node #2 (%s) for maintenance", podName)
+		framework.By("Marking ScyllaCluster node #4 (%s) for maintenance", podName)
 		svc := &corev1.Service{
 			ObjectMeta: metav1.ObjectMeta{
 				Labels: map[string]string{
@@ -150,7 +104,7 @@ var _ = g.Describe("ScyllaCluster", func() {
 		)
 		o.Expect(err).NotTo(o.HaveOccurred())
 
-		framework.By("Manually draining ScyllaCluster node #2 (%s)", podName)
+		framework.By("Manually draining ScyllaCluster node #4 (%s)", podName)
 		ec := &corev1.EphemeralContainer{
 			TargetContainerName: naming.ScyllaContainerName,
 			EphemeralContainerCommon: corev1.EphemeralContainerCommon{
@@ -168,16 +122,48 @@ var _ = g.Describe("ScyllaCluster", func() {
 		o.Expect(ephemeralContainerState.State.Terminated).NotTo(o.BeNil())
 		o.Expect(ephemeralContainerState.State.Terminated.ExitCode).To(o.BeEquivalentTo(0))
 
-		framework.By("Scaling the ScyllaCluster down to 1 replicas")
+		framework.By("Scaling the ScyllaCluster down to 4 replicas")
+		sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace).Patch(
+			ctx,
+			sc.Name,
+			types.JSONPatchType,
+			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 4}]`),
+			metav1.PatchOptions{},
+		)
+		o.Expect(err).NotTo(o.HaveOccurred())
+		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(4))
+
+		framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
+		waitCtx3, waitCtx3Cancel := utils.ContextForRollout(ctx, sc)
+		defer waitCtx3Cancel()
+		sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx3, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
+		o.Expect(err).NotTo(o.HaveOccurred())
+
+		verifyScyllaCluster(ctx, f.KubeClient(), sc)
+		waitForFullQuorum(ctx, f.KubeClient().CoreV1(), sc)
+
+		oldHosts = hosts
+		oldHostIDs = hostIDs
+		hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
+		o.Expect(err).NotTo(o.HaveOccurred())
+		o.Expect(oldHosts).To(o.HaveLen(5))
+		o.Expect(oldHostIDs).To(o.HaveLen(5))
+		o.Expect(hosts).To(o.HaveLen(4))
+		o.Expect(hostIDs).To(o.HaveLen(4))
+		o.Expect(oldHostIDs).To(o.ContainElements(hostIDs))
+
+		verifyCQLData(ctx, diRF3)
+
+		framework.By("Scaling the ScyllaCluster down to 3 replicas")
 		sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
 			ctx,
 			sc.Name,
 			types.JSONPatchType,
-			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 1}]`),
+			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`),
 			metav1.PatchOptions{},
 		)
 		o.Expect(err).NotTo(o.HaveOccurred())
-		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(1))
+		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3))
 
 		framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
 		waitCtx5, waitCtx5Cancel := utils.ContextForRollout(ctx, sc)
@@ -192,24 +178,24 @@ var _ = g.Describe("ScyllaCluster", func() {
 		oldHostIDs = hostIDs
 		hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
 		o.Expect(err).NotTo(o.HaveOccurred())
-		o.Expect(oldHosts).To(o.HaveLen(2))
-		o.Expect(oldHostIDs).To(o.HaveLen(2))
-		o.Expect(hosts).To(o.HaveLen(1))
-		o.Expect(hostIDs).To(o.HaveLen(1))
+		o.Expect(oldHosts).To(o.HaveLen(4))
+		o.Expect(oldHostIDs).To(o.HaveLen(4))
+		o.Expect(hosts).To(o.HaveLen(3))
+		o.Expect(hostIDs).To(o.HaveLen(3))
 		o.Expect(oldHostIDs).To(o.ContainElements(hostIDs))
 
-		verifyCQLData(ctx, diRF1)
+		verifyCQLData(ctx, diRF3)
 
-		framework.By("Scaling the ScyllaCluster back to 3 replicas to make sure there isn't an old (decommissioned) storage in place")
+		framework.By("Scaling the ScyllaCluster back to 5 replicas to make sure there isn't an old (decommissioned) storage in place")
 		sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
 			ctx,
 			sc.Name,
 			types.JSONPatchType,
-			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`),
+			[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 5}]`),
 			metav1.PatchOptions{},
 		)
 		o.Expect(err).NotTo(o.HaveOccurred())
-		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3))
+		o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(5))
 
 		framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
 		waitCtx6, waitCtx6Cancel := utils.ContextForRollout(ctx, sc)
@@ -224,14 +210,12 @@ var _ = g.Describe("ScyllaCluster", func() {
 		oldHostIDs = hostIDs
 		hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
 		o.Expect(err).NotTo(o.HaveOccurred())
-		o.Expect(oldHosts).To(o.HaveLen(1))
-		o.Expect(oldHostIDs).To(o.HaveLen(1))
-		o.Expect(hosts).To(o.HaveLen(3))
-		o.Expect(hostIDs).To(o.HaveLen(3))
+		o.Expect(oldHosts).To(o.HaveLen(3))
+		o.Expect(oldHostIDs).To(o.HaveLen(3))
+		o.Expect(hosts).To(o.HaveLen(5))
+		o.Expect(hostIDs).To(o.HaveLen(5))
 		o.Expect(hostIDs).To(o.ContainElements(oldHostIDs))
 
-		verifyCQLData(ctx, diRF1)
-		verifyCQLData(ctx, diRF2)
 		verifyCQLData(ctx, diRF3)
 	})
 })
diff --git a/test/e2e/set/scyllacluster/scyllamanager_object_storage.go b/test/e2e/set/scyllacluster/scyllamanager_object_storage.go
@@ -32,13 +32,27 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage
 
 	f := framework.NewFramework("scyllacluster")
 
-	g.It("should register cluster, sync backup tasks and support manual restore procedure", func() {
+	type entry struct {
+		scyllaRepository           string
+		scyllaVersion              string
+		preTargetClusterCreateHook func(cluster *scyllav1.ScyllaCluster)
+		postSchemaRestoreHook      func(context.Context, *framework.Framework, *scyllav1.ScyllaCluster)
+	}
+
+	g.DescribeTable("should register cluster, sync backup tasks and support manual restore procedure", func(e entry) {
 		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		defer cancel()
 
 		sourceSC := f.GetDefaultScyllaCluster()
 		sourceSC.Spec.Datacenter.Racks[0].Members = 1
 
+		if len(e.scyllaRepository) != 0 {
+			sourceSC.Spec.Repository = e.scyllaRepository
+		}
+		if len(e.scyllaVersion) != 0 {
+			sourceSC.Spec.Version = e.scyllaVersion
+		}
+
 		objectStorageType := f.GetObjectStorageType()
 		switch objectStorageType {
 		case framework.ObjectStorageTypeGCS:
@@ -266,8 +280,12 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage
 
 		targetSC := f.GetDefaultScyllaCluster()
 		targetSC.Spec.Datacenter.Racks[0].Members = sourceSC.Spec.Datacenter.Racks[0].Members
-		// Restoring schema with ScyllaDB OS 5.4.X or ScyllaDB Enterprise 2024.1.X and consistent_cluster_management isn’t supported.
-		targetSC.Spec.ScyllaArgs = "--consistent-cluster-management=false"
+		targetSC.Spec.Repository = sourceSC.Spec.Repository
+		targetSC.Spec.Version = sourceSC.Spec.Version
+
+		if e.preTargetClusterCreateHook != nil {
+			e.preTargetClusterCreateHook(targetSC)
+		}
 
 		switch objectStorageType {
 		case framework.ObjectStorageTypeGCS:
@@ -382,24 +400,9 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage
 		verifyScyllaCluster(ctx, f.KubeClient(), targetSC)
 		waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC)
 
-		framework.By("Enabling raft in target cluster")
-		_, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
-			ctx,
-			targetSC.Name,
-			types.JSONPatchType,
-			[]byte(`[{"op":"replace","path":"/spec/scyllaArgs","value":"--consistent-cluster-management=true"}]`),
-			metav1.PatchOptions{},
-		)
-		o.Expect(err).NotTo(o.HaveOccurred())
-
-		framework.By("Waiting for the target ScyllaCluster to roll out")
-		waitCtx10, waitCtx10Cancel := utils.ContextForRollout(ctx, targetSC)
-		defer waitCtx10Cancel()
-		targetSC, err = controllerhelpers.WaitForScyllaClusterState(waitCtx10, f.ScyllaClient().ScyllaV1().ScyllaClusters(targetSC.Namespace), targetSC.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
-		o.Expect(err).NotTo(o.HaveOccurred())
-
-		verifyScyllaCluster(ctx, f.KubeClient(), targetSC)
-		waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC)
+		if e.postSchemaRestoreHook != nil {
+			e.postSchemaRestoreHook(ctx, f, targetSC)
+		}
 
 		framework.By("Creating a tables restore task")
 		stdout, stderr, err = utils.ExecWithOptions(f.AdminClientConfig(), f.KubeAdminClient().CoreV1(), utils.ExecOptions{
@@ -438,7 +441,38 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage
 		o.Expect(err).NotTo(o.HaveOccurred())
 
 		verifyCQLData(ctx, di)
-	})
+	},
+		g.Entry("using default ScyllaDB version", entry{}),
+		// Restoring schema with ScyllaDB OS 5.4.X or ScyllaDB Enterprise 2024.1.X and consistent_cluster_management isn’t supported.
+		// This test validates a workaround explained in the docs.
+		g.Entry("using workaround for consistent_cluster_management for ScyllaDB Enterprise 2024.1.X", entry{
+			scyllaRepository: "docker.io/scylladb/scylla-enterprise",
+			scyllaVersion:    "2024.1.5",
+			preTargetClusterCreateHook: func(targetCluster *scyllav1.ScyllaCluster) {
+				targetCluster.Spec.ScyllaArgs = "--consistent-cluster-management=false"
+			},
+			postSchemaRestoreHook: func(ctx context.Context, f *framework.Framework, targetSC *scyllav1.ScyllaCluster) {
+				framework.By("Enabling raft in target cluster")
+				_, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
+					ctx,
+					targetSC.Name,
+					types.JSONPatchType,
+					[]byte(`[{"op":"replace","path":"/spec/scyllaArgs","value":"--consistent-cluster-management=true"}]`),
+					metav1.PatchOptions{},
+				)
+				o.Expect(err).NotTo(o.HaveOccurred())
+
+				framework.By("Waiting for the target ScyllaCluster to roll out")
+				waitCtx, waitCtxCancel := utils.ContextForRollout(ctx, targetSC)
+				defer waitCtxCancel()
+				targetSC, err = controllerhelpers.WaitForScyllaClusterState(waitCtx, f.ScyllaClient().ScyllaV1().ScyllaClusters(targetSC.Namespace), targetSC.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
+				o.Expect(err).NotTo(o.HaveOccurred())
+
+				verifyScyllaCluster(ctx, f.KubeClient(), targetSC)
+				waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC)
+			},
+		}),
+	)
 
 	g.It("should discover cluster and sync errors for invalid tasks and invalid updates to existing tasks", func() {
 		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)