Skip to content

Commit

Permalink
Flaky: TestGameServerCreationAfterDeletingOneExtensionsPod
Browse files Browse the repository at this point in the history
Thanks to extended logging I noticed that in some cases a `Failed`
status Pod could just hang around for a while, even though there were
the correct number of Pods up and running -- which would break this
test.

This changes things such that we only care about counting running Pods,
and ignoring everything else.

Closes #3599
  • Loading branch information
markmandel committed Mar 12, 2024
1 parent 66cd0e2 commit 38fbef7
Showing 1 changed file with 13 additions and 17 deletions.
30 changes: 13 additions & 17 deletions test/e2e/extensions/high_availability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestGameServerCreationAfterDeletingOneExtensionsPod(t *testing.T) {

assert.NoError(t, waitForAgonesExtensionsRunning(ctx))

list, err := getAgoneseExtensionsPods(ctx)
list, err := getAgonesExtensionsPods(ctx)
logger.Infof("Length of pod list is %v", len(list.Items))
if len(list.Items) > 2 {
logger.WithField("podCount", len(list.Items)).Info("Logging events for the Deployment due to pod count > 2 before deleting extensions pod")
Expand All @@ -52,7 +52,7 @@ func TestGameServerCreationAfterDeletingOneExtensionsPod(t *testing.T) {
}
}
require.NoError(t, err, "Could not get list of Extension pods")
assert.Greater(t, len(list.Items), 1, "Cluster has no Extensions pod or has only 1 extensions pod")
require.Greater(t, len(list.Items), 1, "Cluster has no Extensions pod or has only 1 extensions pod")

logger.Infof("Removing one of the Extensions Pods: %v", list.Items[1].ObjectMeta.Name)
deleteAgonesExtensionsPod(ctx, t, true)
Expand All @@ -65,7 +65,7 @@ func TestGameServerCreationAfterDeletingOneExtensionsPod(t *testing.T) {
require.NoError(t, err, "Could not get a GameServer ready")
logger.WithField("gsKey", readyGs.ObjectMeta.Name).Info("GameServer Ready")

assert.NoError(t, framework.AgonesClient.AgonesV1().GameServers(defaultNs).Delete(ctx, readyGs.ObjectMeta.Name, metav1.DeleteOptions{})) // nolint: errcheck
require.NoError(t, framework.AgonesClient.AgonesV1().GameServers(defaultNs).Delete(ctx, readyGs.ObjectMeta.Name, metav1.DeleteOptions{}))
}

func TestGameServerCreationRightAfterDeletingOneExtensionsPod(t *testing.T) {
Expand All @@ -74,7 +74,7 @@ func TestGameServerCreationRightAfterDeletingOneExtensionsPod(t *testing.T) {

assert.NoError(t, waitForAgonesExtensionsRunning(ctx))

list, err := getAgoneseExtensionsPods(ctx)
list, err := getAgonesExtensionsPods(ctx)
logger.Infof("Length of pod list is %v", len(list.Items))
for i := range list.Items {
logger.Infof("Name of extensions pod %v: %v", i, list.Items[i].ObjectMeta.Name)
Expand All @@ -97,7 +97,7 @@ func TestGameServerCreationRightAfterDeletingOneExtensionsPod(t *testing.T) {
// deleteAgonesExtensionsPod deletes one of the extensions pod for the Agones extensions,
// faking a extensions pod crash.
func deleteAgonesExtensionsPod(ctx context.Context, t *testing.T, waitForExtensions bool) {
list, err := getAgoneseExtensionsPods(ctx)
list, err := getAgonesExtensionsPods(ctx)
require.NoError(t, err, "Could not get list of Extension pods")

policy := metav1.DeletePropagationBackground
Expand All @@ -115,29 +115,25 @@ func deleteAgonesExtensionsPod(ctx context.Context, t *testing.T, waitForExtensi

func waitForAgonesExtensionsRunning(ctx context.Context) error {
return wait.PollUntilContextTimeout(ctx, time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
list, err := getAgoneseExtensionsPods(ctx)
list, err := getAgonesExtensionsPods(ctx)
if err != nil {
return true, err
}

if len(list.Items) != 2 {
return false, nil
}

for i := range list.Items {
for _, c := range list.Items[i].Status.ContainerStatuses {
if c.State.Running == nil {
return false, nil
}
// count the number of Running instances
count := 0
for _, item := range list.Items {
if item.Status.Phase == corev1.PodRunning {
count++
}
}

return true, nil
return count == 2, nil
})
}

// getAgonesExtensionsPods returns all the Agones extensions pods
func getAgoneseExtensionsPods(ctx context.Context) (*corev1.PodList, error) {
func getAgonesExtensionsPods(ctx context.Context) (*corev1.PodList, error) {
opts := metav1.ListOptions{LabelSelector: labels.Set{"agones.dev/role": "extensions"}.String()}
pods, err := framework.KubeClient.CoreV1().Pods("agones-system").List(ctx, opts)
if err != nil {
Expand Down

0 comments on commit 38fbef7

Please sign in to comment.