Skip to content

Commit

Permalink
Align E2E tests to ScyllaDB 6.0 changes.
Browse files Browse the repository at this point in the history
Scaling E2E was aligned to make sure it doesn't break minimal required quorum on scaling changes.
Existing test scaled below keyspace RF which is no longer possible, as Scylla rejects decommision when there's a keyspace having RF higher than node count.
Test step checking decommission of drained node was moved earlier to fix the same quorum breakage.

Alternator E2E required a table name change from which we are getting
password. Table name was renamed in 6.0.

Restore E2E was parametrized to make sure we test the procedure for both default ScyllaDB version and 2024.1 where a workaround explained in the documentation is required.
  • Loading branch information
zimnx committed Jun 28, 2024
1 parent 6f3ff65 commit f91a115
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 102 deletions.
2 changes: 1 addition & 1 deletion test/e2e/set/scyllacluster/scyllacluster_alternator.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ authorizer: CassandraAuthorizer
}

q := cqlSession.Query(
`SELECT salted_hash FROM system_auth.roles WHERE role = ?`,
`SELECT salted_hash FROM system.roles WHERE role = ?`,
awsCredentials.AccessKeyID,
).WithContext(ctx)
err = q.Scan(&awsCredentials.SecretAccessKey)
Expand Down
142 changes: 63 additions & 79 deletions test/e2e/set/scyllacluster/scyllacluster_scaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ var _ = g.Describe("ScyllaCluster", func() {
defer cancel()

sc := f.GetDefaultScyllaCluster()
sc.Spec.Datacenter.Racks[0].Members = 1
sc.Spec.Datacenter.Racks[0].Members = 3

framework.By("Creating a ScyllaCluster with 1 member")
framework.By("Creating a ScyllaCluster with 3 members")
sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())

Expand All @@ -45,22 +45,22 @@ var _ = g.Describe("ScyllaCluster", func() {

hosts, hostIDs, err := utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(hosts).To(o.HaveLen(1))
o.Expect(hostIDs).To(o.HaveLen(1))
diRF1 := insertAndVerifyCQLData(ctx, hosts)
defer diRF1.Close()
o.Expect(hosts).To(o.HaveLen(3))
o.Expect(hostIDs).To(o.HaveLen(3))
diRF3 := insertAndVerifyCQLData(ctx, hosts)
defer diRF3.Close()

framework.By("Scaling the ScyllaCluster to 3 replicas")
framework.By("Scaling the ScyllaCluster to 5 replicas")
sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
ctx,
sc.Name,
types.JSONPatchType,
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`),
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 5}]`),
metav1.PatchOptions{},
)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1))
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3))
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(5))

framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
waitCtx2, waitCtx2Cancel := utils.ContextForRollout(ctx, sc)
Expand All @@ -75,63 +75,17 @@ var _ = g.Describe("ScyllaCluster", func() {
oldHostIDs := hostIDs
hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(oldHosts).To(o.HaveLen(1))
o.Expect(oldHostIDs).To(o.HaveLen(1))
o.Expect(hosts).To(o.HaveLen(3))
o.Expect(hostIDs).To(o.HaveLen(3))
o.Expect(hostIDs).To(o.ContainElements(oldHostIDs))

verifyCQLData(ctx, diRF1)

// Statistically, some data should land on the 3rd node that will give us a chance to ensure
// it was stream correctly when downscaling.
diRF2 := insertAndVerifyCQLData(ctx, hosts[0:2])
defer diRF2.Close()

diRF3 := insertAndVerifyCQLData(ctx, hosts)
defer diRF3.Close()

framework.By("Scaling the ScyllaCluster down to 2 replicas")
sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace).Patch(
ctx,
sc.Name,
types.JSONPatchType,
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 2}]`),
metav1.PatchOptions{},
)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(2))

framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
waitCtx3, waitCtx3Cancel := utils.ContextForRollout(ctx, sc)
defer waitCtx3Cancel()
sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx3, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
o.Expect(err).NotTo(o.HaveOccurred())

verifyScyllaCluster(ctx, f.KubeClient(), sc)
waitForFullQuorum(ctx, f.KubeClient().CoreV1(), sc)

oldHosts = hosts
oldHostIDs = hostIDs
hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(oldHosts).To(o.HaveLen(3))
o.Expect(oldHostIDs).To(o.HaveLen(3))
o.Expect(hosts).To(o.HaveLen(2))
o.Expect(hostIDs).To(o.HaveLen(2))
o.Expect(oldHostIDs).To(o.ContainElements(hostIDs))

verifyCQLData(ctx, diRF1)
o.Expect(hosts).To(o.HaveLen(5))
o.Expect(hostIDs).To(o.HaveLen(5))
o.Expect(hostIDs).To(o.ContainElements(oldHostIDs))

// The 2 nodes out of 3 we used earlier may not be the ones that got left. Although discovery will still
// make sure the missing one is picked up, let's avoid having a down node in the pool and refresh it.
err = diRF2.SetClientEndpoints(hosts)
o.Expect(err).NotTo(o.HaveOccurred())
verifyCQLData(ctx, diRF2)
verifyCQLData(ctx, diRF3)

podName := naming.StatefulSetNameForRack(sc.Spec.Datacenter.Racks[0], sc) + "-1"
podName := naming.StatefulSetNameForRack(sc.Spec.Datacenter.Racks[0], sc) + "-4"
svcName := podName
framework.By("Marking ScyllaCluster node #2 (%s) for maintenance", podName)
framework.By("Marking ScyllaCluster node #4 (%s) for maintenance", podName)
svc := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand All @@ -150,7 +104,7 @@ var _ = g.Describe("ScyllaCluster", func() {
)
o.Expect(err).NotTo(o.HaveOccurred())

framework.By("Manually draining ScyllaCluster node #2 (%s)", podName)
framework.By("Manually draining ScyllaCluster node #4 (%s)", podName)
ec := &corev1.EphemeralContainer{
TargetContainerName: naming.ScyllaContainerName,
EphemeralContainerCommon: corev1.EphemeralContainerCommon{
Expand All @@ -168,16 +122,48 @@ var _ = g.Describe("ScyllaCluster", func() {
o.Expect(ephemeralContainerState.State.Terminated).NotTo(o.BeNil())
o.Expect(ephemeralContainerState.State.Terminated.ExitCode).To(o.BeEquivalentTo(0))

framework.By("Scaling the ScyllaCluster down to 1 replicas")
framework.By("Scaling the ScyllaCluster down to 4 replicas")
sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace).Patch(
ctx,
sc.Name,
types.JSONPatchType,
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 4}]`),
metav1.PatchOptions{},
)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(4))

framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
waitCtx3, waitCtx3Cancel := utils.ContextForRollout(ctx, sc)
defer waitCtx3Cancel()
sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx3, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
o.Expect(err).NotTo(o.HaveOccurred())

verifyScyllaCluster(ctx, f.KubeClient(), sc)
waitForFullQuorum(ctx, f.KubeClient().CoreV1(), sc)

oldHosts = hosts
oldHostIDs = hostIDs
hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(oldHosts).To(o.HaveLen(5))
o.Expect(oldHostIDs).To(o.HaveLen(5))
o.Expect(hosts).To(o.HaveLen(4))
o.Expect(hostIDs).To(o.HaveLen(4))
o.Expect(oldHostIDs).To(o.ContainElements(hostIDs))

verifyCQLData(ctx, diRF3)

framework.By("Scaling the ScyllaCluster down to 3 replicas")
sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
ctx,
sc.Name,
types.JSONPatchType,
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 1}]`),
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`),
metav1.PatchOptions{},
)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(1))
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3))

framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
waitCtx5, waitCtx5Cancel := utils.ContextForRollout(ctx, sc)
Expand All @@ -192,24 +178,24 @@ var _ = g.Describe("ScyllaCluster", func() {
oldHostIDs = hostIDs
hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(oldHosts).To(o.HaveLen(2))
o.Expect(oldHostIDs).To(o.HaveLen(2))
o.Expect(hosts).To(o.HaveLen(1))
o.Expect(hostIDs).To(o.HaveLen(1))
o.Expect(oldHosts).To(o.HaveLen(4))
o.Expect(oldHostIDs).To(o.HaveLen(4))
o.Expect(hosts).To(o.HaveLen(3))
o.Expect(hostIDs).To(o.HaveLen(3))
o.Expect(oldHostIDs).To(o.ContainElements(hostIDs))

verifyCQLData(ctx, diRF1)
verifyCQLData(ctx, diRF3)

framework.By("Scaling the ScyllaCluster back to 3 replicas to make sure there isn't an old (decommissioned) storage in place")
framework.By("Scaling the ScyllaCluster back to 5 replicas to make sure there isn't an old (decommissioned) storage in place")
sc, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
ctx,
sc.Name,
types.JSONPatchType,
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 3}]`),
[]byte(`[{"op": "replace", "path": "/spec/datacenter/racks/0/members", "value": 5}]`),
metav1.PatchOptions{},
)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(3))
o.Expect(sc.Spec.Datacenter.Racks[0].Members).To(o.BeEquivalentTo(5))

framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion)
waitCtx6, waitCtx6Cancel := utils.ContextForRollout(ctx, sc)
Expand All @@ -224,14 +210,12 @@ var _ = g.Describe("ScyllaCluster", func() {
oldHostIDs = hostIDs
hosts, hostIDs, err = utils.GetBroadcastRPCAddressesAndUUIDs(ctx, f.KubeClient().CoreV1(), sc)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(oldHosts).To(o.HaveLen(1))
o.Expect(oldHostIDs).To(o.HaveLen(1))
o.Expect(hosts).To(o.HaveLen(3))
o.Expect(hostIDs).To(o.HaveLen(3))
o.Expect(oldHosts).To(o.HaveLen(3))
o.Expect(oldHostIDs).To(o.HaveLen(3))
o.Expect(hosts).To(o.HaveLen(5))
o.Expect(hostIDs).To(o.HaveLen(5))
o.Expect(hostIDs).To(o.ContainElements(oldHostIDs))

verifyCQLData(ctx, diRF1)
verifyCQLData(ctx, diRF2)
verifyCQLData(ctx, diRF3)
})
})
78 changes: 56 additions & 22 deletions test/e2e/set/scyllacluster/scyllamanager_object_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,27 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage

f := framework.NewFramework("scyllacluster")

g.It("should register cluster, sync backup tasks and support manual restore procedure", func() {
type entry struct {
scyllaRepository string
scyllaVersion string
preTargetClusterCreateHook func(cluster *scyllav1.ScyllaCluster)
postSchemaRestoreHook func(context.Context, *framework.Framework, *scyllav1.ScyllaCluster)
}

g.DescribeTable("should register cluster, sync backup tasks and support manual restore procedure", func(e entry) {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
defer cancel()

sourceSC := f.GetDefaultScyllaCluster()
sourceSC.Spec.Datacenter.Racks[0].Members = 1

if len(e.scyllaRepository) != 0 {
sourceSC.Spec.Repository = e.scyllaRepository
}
if len(e.scyllaVersion) != 0 {
sourceSC.Spec.Version = e.scyllaVersion
}

objectStorageType := f.GetObjectStorageType()
switch objectStorageType {
case framework.ObjectStorageTypeGCS:
Expand Down Expand Up @@ -266,8 +280,12 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage

targetSC := f.GetDefaultScyllaCluster()
targetSC.Spec.Datacenter.Racks[0].Members = sourceSC.Spec.Datacenter.Racks[0].Members
// Restoring schema with ScyllaDB OS 5.4.X or ScyllaDB Enterprise 2024.1.X and consistent_cluster_management isn’t supported.
targetSC.Spec.ScyllaArgs = "--consistent-cluster-management=false"
targetSC.Spec.Repository = sourceSC.Spec.Repository
targetSC.Spec.Version = sourceSC.Spec.Version

if e.preTargetClusterCreateHook != nil {
e.preTargetClusterCreateHook(targetSC)
}

switch objectStorageType {
case framework.ObjectStorageTypeGCS:
Expand Down Expand Up @@ -382,24 +400,9 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage
verifyScyllaCluster(ctx, f.KubeClient(), targetSC)
waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC)

framework.By("Enabling raft in target cluster")
_, err = f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
ctx,
targetSC.Name,
types.JSONPatchType,
[]byte(`[{"op":"replace","path":"/spec/scyllaArgs","value":"--consistent-cluster-management=true"}]`),
metav1.PatchOptions{},
)
o.Expect(err).NotTo(o.HaveOccurred())

framework.By("Waiting for the target ScyllaCluster to roll out")
waitCtx10, waitCtx10Cancel := utils.ContextForRollout(ctx, targetSC)
defer waitCtx10Cancel()
targetSC, err = controllerhelpers.WaitForScyllaClusterState(waitCtx10, f.ScyllaClient().ScyllaV1().ScyllaClusters(targetSC.Namespace), targetSC.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
o.Expect(err).NotTo(o.HaveOccurred())

verifyScyllaCluster(ctx, f.KubeClient(), targetSC)
waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC)
if e.postSchemaRestoreHook != nil {
e.postSchemaRestoreHook(ctx, f, targetSC)
}

framework.By("Creating a tables restore task")
stdout, stderr, err = utils.ExecWithOptions(f.AdminClientConfig(), f.KubeAdminClient().CoreV1(), utils.ExecOptions{
Expand Down Expand Up @@ -438,7 +441,38 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage
o.Expect(err).NotTo(o.HaveOccurred())

verifyCQLData(ctx, di)
})
},
g.Entry("using default ScyllaDB version", entry{}),
// Restoring schema with ScyllaDB OS 5.4.X or ScyllaDB Enterprise 2024.1.X and consistent_cluster_management isn’t supported.
// This test validates a workaround explained in the docs.
g.Entry("using workaround for consistent_cluster_management for ScyllaDB Enterprise 2024.1.X", entry{
scyllaRepository: "docker.io/scylladb/scylla-enterprise",
scyllaVersion: "2024.1.5",
preTargetClusterCreateHook: func(targetCluster *scyllav1.ScyllaCluster) {
targetCluster.Spec.ScyllaArgs = "--consistent-cluster-management=false"
},
postSchemaRestoreHook: func(ctx context.Context, f *framework.Framework, targetSC *scyllav1.ScyllaCluster) {
framework.By("Enabling raft in target cluster")
_, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Patch(
ctx,
targetSC.Name,
types.JSONPatchType,
[]byte(`[{"op":"replace","path":"/spec/scyllaArgs","value":"--consistent-cluster-management=true"}]`),
metav1.PatchOptions{},
)
o.Expect(err).NotTo(o.HaveOccurred())

framework.By("Waiting for the target ScyllaCluster to roll out")
waitCtx, waitCtxCancel := utils.ContextForRollout(ctx, targetSC)
defer waitCtxCancel()
targetSC, err = controllerhelpers.WaitForScyllaClusterState(waitCtx, f.ScyllaClient().ScyllaV1().ScyllaClusters(targetSC.Namespace), targetSC.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut)
o.Expect(err).NotTo(o.HaveOccurred())

verifyScyllaCluster(ctx, f.KubeClient(), targetSC)
waitForFullQuorum(ctx, f.KubeClient().CoreV1(), targetSC)
},
}),
)

g.It("should discover cluster and sync errors for invalid tasks and invalid updates to existing tasks", func() {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
Expand Down

0 comments on commit f91a115

Please sign in to comment.